{"train_lr": 0.00014956716896441166, "train_min_lr": 0.00014956716896441166, "train_loss": 0.38172032931246436, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.23971221050940072, "epoch": 0, "n_parameters": 303924416} {"train_lr": 0.00044966335363898674, "train_min_lr": 0.00044966335363898674, "train_loss": 0.36654696435643697, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.03155437959298396, "epoch": 1, "n_parameters": 303924416} {"train_lr": 0.000749759538313562, "train_min_lr": 0.000749759538313562, "train_loss": 0.36472014661949986, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.014652351293569574, "epoch": 2, "n_parameters": 303924416} {"train_lr": 0.0010498557229881365, "train_min_lr": 0.0010498557229881365, "train_loss": 0.3633722287351982, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.010068480908954278, "epoch": 3, "n_parameters": 303924416} {"train_lr": 0.0013499519076627113, "train_min_lr": 0.0013499519076627113, "train_loss": 0.3622303508425084, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007884505883945772, "epoch": 4, "n_parameters": 303924416} {"train_lr": 0.0016500480923372883, "train_min_lr": 0.0016500480923372883, "train_loss": 0.36097650309630597, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.0074350301158399535, "epoch": 5, "n_parameters": 303924416} {"train_lr": 0.0019501442770118633, "train_min_lr": 0.0019501442770118633, "train_loss": 0.3579726323085383, "train_loss_scale": 104395.48717948717, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008951898503045622, "epoch": 6, "n_parameters": 303924416} {"train_lr": 0.002250240461686437, "train_min_lr": 0.002250240461686437, "train_loss": 0.35499510920952815, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.009583615609862579, "epoch": 7, "n_parameters": 303924416} {"train_lr": 0.0025503366463610127, "train_min_lr": 0.0025503366463610127, "train_loss": 0.3512055580802739, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.010603275367966255, "epoch": 8, "n_parameters": 303924416} {"train_lr": 0.002850432831035588, "train_min_lr": 0.002850432831035588, "train_loss": 0.34679530464256036, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.010993302437082794, "epoch": 9, "n_parameters": 303924416} {"train_lr": 0.0029999990319348056, "train_min_lr": 0.0029999990319348056, "train_loss": 0.3418944622031771, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.010258701589639084, "epoch": 10, "n_parameters": 303924416} {"train_lr": 0.0029999932048716707, "train_min_lr": 0.0029999932048716707, "train_loss": 0.3382800502810054, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.010518391097251039, "epoch": 11, "n_parameters": 303924416} {"train_lr": 0.0029999815414149186, "train_min_lr": 0.0029999815414149186, "train_loss": 0.3333229383226866, "train_loss_scale": 155017.84615384616, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008058239971526349, "epoch": 12, "n_parameters": 303924416} {"train_lr": 0.002999964041610077, "train_min_lr": 0.002999964041610077, "train_loss": 0.32920888677322996, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007733738693134047, "epoch": 13, "n_parameters": 303924416} {"train_lr": 0.0029999407055254644, "train_min_lr": 0.0029999407055254644, "train_loss": 0.3257497643144467, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007664547230188663, "epoch": 14, "n_parameters": 303924416} {"train_lr": 0.002999911533252189, "train_min_lr": 0.002999911533252189, "train_loss": 0.3227131732124596, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007522892051686843, "epoch": 15, "n_parameters": 303924416} {"train_lr": 0.0029998765249041335, "train_min_lr": 0.0029998765249041335, "train_loss": 0.3200697694839432, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007385246916853178, "epoch": 16, "n_parameters": 303924416} {"train_lr": 0.002999835680617969, "train_min_lr": 0.002999835680617969, "train_loss": 0.3177476077615164, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007328961226635446, "epoch": 17, "n_parameters": 303924416} {"train_lr": 0.002999789000553154, "train_min_lr": 0.002999789000553154, "train_loss": 0.31564193350585323, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00708890847939377, "epoch": 18, "n_parameters": 303924416} {"train_lr": 0.002999736484891923, "train_min_lr": 0.002999736484891923, "train_loss": 0.31380801471487546, "train_loss_scale": 464633.4358974359, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006981406917568678, "epoch": 19, "n_parameters": 303924416} {"train_lr": 0.0029996781338392934, "train_min_lr": 0.0029996781338392934, "train_loss": 0.31224844848008776, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007003525955578647, "epoch": 20, "n_parameters": 303924416} {"train_lr": 0.0029996139476230668, "train_min_lr": 0.0029996139476230668, "train_loss": 0.31078938161058783, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00695618047999839, "epoch": 21, "n_parameters": 303924416} {"train_lr": 0.0029995439264938278, "train_min_lr": 0.0029995439264938278, "train_loss": 0.30938695053844595, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.0067688261355774906, "epoch": 22, "n_parameters": 303924416} {"train_lr": 0.002999468070724929, "train_min_lr": 0.002999468070724929, "train_loss": 0.30825408183348674, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006766736490998226, "epoch": 23, "n_parameters": 303924416} {"train_lr": 0.0029993863806125134, "train_min_lr": 0.0029993863806125134, "train_loss": 0.30710840572674686, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006671832687854289, "epoch": 24, "n_parameters": 303924416} {"train_lr": 0.0029992988564754917, "train_min_lr": 0.0029992988564754917, "train_loss": 0.30615074896158123, "train_loss_scale": 714174.358974359, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.0067500483799869055, "epoch": 25, "n_parameters": 303924416} {"train_lr": 0.0029992054986555587, "train_min_lr": 0.0029992054986555587, "train_loss": 0.3051374747376077, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006682100622222209, "epoch": 26, "n_parameters": 303924416} {"train_lr": 0.002999106307517179, "train_min_lr": 0.002999106307517179, "train_loss": 0.30420883593316644, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006663047427980182, "epoch": 27, "n_parameters": 303924416} {"train_lr": 0.00299900128344759, "train_min_lr": 0.00299900128344759, "train_loss": 0.30342967769740004, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006625481854336193, "epoch": 28, "n_parameters": 303924416} {"train_lr": 0.002998890426856795, "train_min_lr": 0.002998890426856795, "train_loss": 0.30260364719941163, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006557869305535673, "epoch": 29, "n_parameters": 303924416} {"train_lr": 0.0029987737381775878, "train_min_lr": 0.0029987737381775878, "train_loss": 0.301889934985994, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00655998084664297, "epoch": 30, "n_parameters": 303924416} {"train_lr": 0.002998651217865505, "train_min_lr": 0.002998651217865505, "train_loss": 0.3012511334919299, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006603596369341875, "epoch": 31, "n_parameters": 303924416} {"train_lr": 0.0029985228663988615, "train_min_lr": 0.0029985228663988615, "train_loss": 0.3005921800519364, "train_loss_scale": 2046739.6923076923, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006542124322806604, "epoch": 32, "n_parameters": 303924416} {"train_lr": 0.002998388684278744, "train_min_lr": 0.002998388684278744, "train_loss": 0.30005016021478254, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006564827567229095, "epoch": 33, "n_parameters": 303924416} {"train_lr": 0.00299824867202899, "train_min_lr": 0.00299824867202899, "train_loss": 0.29946245968891066, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006558452391077598, "epoch": 34, "n_parameters": 303924416} {"train_lr": 0.0029981028301961947, "train_min_lr": 0.0029981028301961947, "train_loss": 0.298919905633785, "train_loss_scale": 1690492.717948718, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 35, "n_parameters": 303924416} {"train_lr": 0.0029979511593497257, "train_min_lr": 0.0029979511593497257, "train_loss": 0.2983709098920465, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006494175605416202, "epoch": 36, "n_parameters": 303924416} {"train_lr": 0.002997793660081702, "train_min_lr": 0.002997793660081702, "train_loss": 0.29785470066902536, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006485527411151009, "epoch": 37, "n_parameters": 303924416} {"train_lr": 0.002997630333006995, "train_min_lr": 0.002997630333006995, "train_loss": 0.29742878956648594, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006470472178756235, "epoch": 38, "n_parameters": 303924416} {"train_lr": 0.002997461178763217, "train_min_lr": 0.002997461178763217, "train_loss": 0.2969667099069995, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006470574669014567, "epoch": 39, "n_parameters": 303924416} {"train_lr": 0.0029972861980107515, "train_min_lr": 0.0029972861980107515, "train_loss": 0.29660754918884963, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006544498119276399, "epoch": 40, "n_parameters": 303924416} {"train_lr": 0.0029971053914327112, "train_min_lr": 0.0029971053914327112, "train_loss": 0.29619660186402214, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006524226338698123, "epoch": 41, "n_parameters": 303924416} {"train_lr": 0.0029969187597349537, "train_min_lr": 0.0029969187597349537, "train_loss": 0.2957636254141107, "train_loss_scale": 2073626.2564102565, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006484731970629536, "epoch": 42, "n_parameters": 303924416} {"train_lr": 0.0029967263036460904, "train_min_lr": 0.0029967263036460904, "train_loss": 0.2954202954364845, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006554801918518467, "epoch": 43, "n_parameters": 303924416} {"train_lr": 0.0029965280239174527, "train_min_lr": 0.0029965280239174527, "train_loss": 0.29505378412357414, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006484324802071429, "epoch": 44, "n_parameters": 303924416} {"train_lr": 0.0029963239213231223, "train_min_lr": 0.0029963239213231223, "train_loss": 0.29474306407456213, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006509769403447325, "epoch": 45, "n_parameters": 303924416} {"train_lr": 0.002996113996659908, "train_min_lr": 0.002996113996659908, "train_loss": 0.2944371265042812, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00653144239465921, "epoch": 46, "n_parameters": 303924416} {"train_lr": 0.0029958982507473477, "train_min_lr": 0.0029958982507473477, "train_loss": 0.2940899709938094, "train_loss_scale": 1230060.3076923077, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 47, "n_parameters": 303924416} {"train_lr": 0.0029956766844277003, "train_min_lr": 0.0029956766844277003, "train_loss": 0.29374789038840204, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006472462116597364, "epoch": 48, "n_parameters": 303924416} {"train_lr": 0.002995449298565954, "train_min_lr": 0.002995449298565954, "train_loss": 0.293459344897061, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006505653515864068, "epoch": 49, "n_parameters": 303924416} {"train_lr": 0.0029952160940498185, "train_min_lr": 0.0029952160940498185, "train_loss": 0.2932004473404959, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006503304551677922, "epoch": 50, "n_parameters": 303924416} {"train_lr": 0.002994977071789708, "train_min_lr": 0.002994977071789708, "train_loss": 0.2929141990130003, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006524027440135773, "epoch": 51, "n_parameters": 303924416} {"train_lr": 0.002994732232718759, "train_min_lr": 0.002994732232718759, "train_loss": 0.2926651844933916, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00652846090722447, "epoch": 52, "n_parameters": 303924416} {"train_lr": 0.0029944815777928125, "train_min_lr": 0.0029944815777928125, "train_loss": 0.29244929201638276, "train_loss_scale": 1485482.6666666667, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006584923918872403, "epoch": 53, "n_parameters": 303924416} {"train_lr": 0.0029942251079904166, "train_min_lr": 0.0029942251079904166, "train_loss": 0.2922123864454289, "train_loss_scale": 1925750.1538461538, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 54, "n_parameters": 303924416} {"train_lr": 0.002993962824312818, "train_min_lr": 0.002993962824312818, "train_loss": 0.29187152263684535, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006485544149584781, "epoch": 55, "n_parameters": 303924416} {"train_lr": 0.002993694727783965, "train_min_lr": 0.002993694727783965, "train_loss": 0.29162167991774207, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.0065553119832960265, "epoch": 56, "n_parameters": 303924416} {"train_lr": 0.0029934208194504915, "train_min_lr": 0.0029934208194504915, "train_loss": 0.2914558191013594, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006539256783584371, "epoch": 57, "n_parameters": 303924416} {"train_lr": 0.0029931411003817263, "train_min_lr": 0.0029931411003817263, "train_loss": 0.2912547507812866, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006564397381272358, "epoch": 58, "n_parameters": 303924416} {"train_lr": 0.0029928555716696795, "train_min_lr": 0.0029928555716696795, "train_loss": 0.29101764763860655, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00651926544154636, "epoch": 59, "n_parameters": 303924416} {"train_lr": 0.002992564234429045, "train_min_lr": 0.002992564234429045, "train_loss": 0.29078555268605644, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006514452111262541, "epoch": 60, "n_parameters": 303924416} {"train_lr": 0.0029922670897972003, "train_min_lr": 0.0029922670897972003, "train_loss": 0.29055701222760266, "train_loss_scale": 1236781.9487179487, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 61, "n_parameters": 303924416} {"train_lr": 0.002991964138934168, "train_min_lr": 0.002991964138934168, "train_loss": 0.29038986360701996, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006536950584118947, "epoch": 62, "n_parameters": 303924416} {"train_lr": 0.0029916553830226754, "train_min_lr": 0.0029916553830226754, "train_loss": 0.2901717074191532, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006511359184514731, "epoch": 63, "n_parameters": 303924416} {"train_lr": 0.0029913408232680786, "train_min_lr": 0.0029913408232680786, "train_loss": 0.2900079893670642, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00654626781723868, "epoch": 64, "n_parameters": 303924416} {"train_lr": 0.0029910204608984106, "train_min_lr": 0.0029910204608984106, "train_loss": 0.2898402036143801, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006508852491000046, "epoch": 65, "n_parameters": 303924416} {"train_lr": 0.002990694297164359, "train_min_lr": 0.002990694297164359, "train_loss": 0.28967062055539244, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006530556103919083, "epoch": 66, "n_parameters": 303924416} {"train_lr": 0.0029903623333392484, "train_min_lr": 0.0029903623333392484, "train_loss": 0.2894076771962528, "train_loss_scale": 1219977.8461538462, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006477660311457629, "epoch": 67, "n_parameters": 303924416} {"train_lr": 0.002990024570719051, "train_min_lr": 0.002990024570719051, "train_loss": 0.2892833585456873, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006508379080631317, "epoch": 68, "n_parameters": 303924416} {"train_lr": 0.002989681010622383, "train_min_lr": 0.002989681010622383, "train_loss": 0.2891693823457433, "train_loss_scale": 1589668.1025641025, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 69, "n_parameters": 303924416} {"train_lr": 0.002989331654390483, "train_min_lr": 0.002989331654390483, "train_loss": 0.28906921765659577, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006603867627489261, "epoch": 70, "n_parameters": 303924416} {"train_lr": 0.0029889765033872333, "train_min_lr": 0.0029889765033872333, "train_loss": 0.28886235873991, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.0065139897707969146, "epoch": 71, "n_parameters": 303924416} {"train_lr": 0.00298861555899912, "train_min_lr": 0.00298861555899912, "train_loss": 0.28867658408573615, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006541265853835891, "epoch": 72, "n_parameters": 303924416} {"train_lr": 0.002988248822635262, "train_min_lr": 0.002988248822635262, "train_loss": 0.2885432825065576, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00652776207948008, "epoch": 73, "n_parameters": 303924416} {"train_lr": 0.0029878762957273783, "train_min_lr": 0.0029878762957273783, "train_loss": 0.2884130419548362, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006539115906119919, "epoch": 74, "n_parameters": 303924416} {"train_lr": 0.002987497979729805, "train_min_lr": 0.002987497979729805, "train_loss": 0.2882960920090763, "train_loss_scale": 1125874.8717948718, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006577526841563388, "epoch": 75, "n_parameters": 303924416} {"train_lr": 0.002987113876119467, "train_min_lr": 0.002987113876119467, "train_loss": 0.2881280946999024, "train_loss_scale": 1929110.9743589743, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 76, "n_parameters": 303924416} {"train_lr": 0.002986723986395889, "train_min_lr": 0.002986723986395889, "train_loss": 0.2879918774064535, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.0065027945689880885, "epoch": 77, "n_parameters": 303924416} {"train_lr": 0.0029863283120811898, "train_min_lr": 0.0029863283120811898, "train_loss": 0.2878410844251705, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006490396156620521, "epoch": 78, "n_parameters": 303924416} {"train_lr": 0.002985926854720063, "train_min_lr": 0.002985926854720063, "train_loss": 0.28773523496392256, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006527870835270733, "epoch": 79, "n_parameters": 303924416} {"train_lr": 0.002985519615879786, "train_min_lr": 0.002985519615879786, "train_loss": 0.28763389926797783, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006556712849161182, "epoch": 80, "n_parameters": 303924416} {"train_lr": 0.002985106597150196, "train_min_lr": 0.002985106597150196, "train_loss": 0.28749432161342925, "train_loss_scale": 766267.0769230769, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 81, "n_parameters": 303924416} {"train_lr": 0.0029846878001437093, "train_min_lr": 0.0029846878001437093, "train_loss": 0.28739436741512364, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006550950829524738, "epoch": 82, "n_parameters": 303924416} {"train_lr": 0.002984263226495282, "train_min_lr": 0.002984263226495282, "train_loss": 0.2872512638150977, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006498449886492334, "epoch": 83, "n_parameters": 303924416} {"train_lr": 0.002983832877862442, "train_min_lr": 0.002983832877862442, "train_loss": 0.28718677315359503, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006534730996160458, "epoch": 84, "n_parameters": 303924416} {"train_lr": 0.002983396755925252, "train_min_lr": 0.002983396755925252, "train_loss": 0.28704494037008726, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006545760597173984, "epoch": 85, "n_parameters": 303924416} {"train_lr": 0.0029829548623863107, "train_min_lr": 0.0029829548623863107, "train_loss": 0.28694356280343175, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006523475395694662, "epoch": 86, "n_parameters": 303924416} {"train_lr": 0.0029825071989707597, "train_min_lr": 0.0029825071989707597, "train_loss": 0.28682671954186684, "train_loss_scale": 591504.4102564103, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006531813040148849, "epoch": 87, "n_parameters": 303924416} {"train_lr": 0.002982053767426249, "train_min_lr": 0.002982053767426249, "train_loss": 0.28673509034949046, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.0065353075833394164, "epoch": 88, "n_parameters": 303924416} {"train_lr": 0.0029815945695229615, "train_min_lr": 0.0029815945695229615, "train_loss": 0.28661820379933584, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006535311847423704, "epoch": 89, "n_parameters": 303924416} {"train_lr": 0.002981129607053593, "train_min_lr": 0.002981129607053593, "train_loss": 0.286471084297563, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006516824822681837, "epoch": 90, "n_parameters": 303924416} {"train_lr": 0.0029806588818333314, "train_min_lr": 0.0029806588818333314, "train_loss": 0.2864039986251065, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006526257245777509, "epoch": 91, "n_parameters": 303924416} {"train_lr": 0.002980182395699876, "train_min_lr": 0.002980182395699876, "train_loss": 0.2862906542379791, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006508108052329566, "epoch": 92, "n_parameters": 303924416} {"train_lr": 0.0029797001505133982, "train_min_lr": 0.0029797001505133982, "train_loss": 0.28621691511346936, "train_loss_scale": 546133.3333333334, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 93, "n_parameters": 303924416} {"train_lr": 0.002979212148156572, "train_min_lr": 0.002979212148156572, "train_loss": 0.28614988137717146, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006523231869766441, "epoch": 94, "n_parameters": 303924416} {"train_lr": 0.0029787183905345444, "train_min_lr": 0.0029787183905345444, "train_loss": 0.28605029543037885, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006540652509265317, "epoch": 95, "n_parameters": 303924416} {"train_lr": 0.0029782188795749115, "train_min_lr": 0.0029782188795749115, "train_loss": 0.285956729334803, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.0065371338546705935, "epoch": 96, "n_parameters": 303924416} {"train_lr": 0.0029777136172277536, "train_min_lr": 0.0029777136172277536, "train_loss": 0.28581402201850253, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006535751703314674, "epoch": 97, "n_parameters": 303924416} {"train_lr": 0.0029772026054655956, "train_min_lr": 0.0029772026054655956, "train_loss": 0.2857966417064651, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006555021974819306, "epoch": 98, "n_parameters": 303924416} {"train_lr": 0.002976685846283399, "train_min_lr": 0.002976685846283399, "train_loss": 0.2856989066110542, "train_loss_scale": 811638.1538461539, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006565343842316323, "epoch": 99, "n_parameters": 303924416} {"train_lr": 0.002976163341698581, "train_min_lr": 0.002976163341698581, "train_loss": 0.28562526400977123, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006541377360311647, "epoch": 100, "n_parameters": 303924416} {"train_lr": 0.0029756350937509696, "train_min_lr": 0.0029756350937509696, "train_loss": 0.285542341047683, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006523840871150008, "epoch": 101, "n_parameters": 303924416} {"train_lr": 0.0029751011045028287, "train_min_lr": 0.0029751011045028287, "train_loss": 0.28544092349385697, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.0065247069847268555, "epoch": 102, "n_parameters": 303924416} {"train_lr": 0.0029745613760388313, "train_min_lr": 0.0029745613760388313, "train_loss": 0.2853100116227347, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006505967799920398, "epoch": 103, "n_parameters": 303924416} {"train_lr": 0.0029740159104660532, "train_min_lr": 0.0029740159104660532, "train_loss": 0.2852841754718564, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006520555081526534, "epoch": 104, "n_parameters": 303924416} {"train_lr": 0.00297346470991397, "train_min_lr": 0.00297346470991397, "train_loss": 0.2852255744405855, "train_loss_scale": 1193091.282051282, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006535230411921078, "epoch": 105, "n_parameters": 303924416} {"train_lr": 0.0029729077765344473, "train_min_lr": 0.0029729077765344473, "train_loss": 0.2851588042685762, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006541558907021028, "epoch": 106, "n_parameters": 303924416} {"train_lr": 0.0029723451125017353, "train_min_lr": 0.0029723451125017353, "train_loss": 0.2850841914679712, "train_loss_scale": 1522451.6923076923, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 107, "n_parameters": 303924416} {"train_lr": 0.002971776720012444, "train_min_lr": 0.002971776720012444, "train_loss": 0.2850606763490643, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006644078670666577, "epoch": 108, "n_parameters": 303924416} {"train_lr": 0.0029712026012855583, "train_min_lr": 0.0029712026012855583, "train_loss": 0.28498716498963916, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006551834280029512, "epoch": 109, "n_parameters": 303924416} {"train_lr": 0.002970622758562414, "train_min_lr": 0.002970622758562414, "train_loss": 0.28485655527896225, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006537470088985104, "epoch": 110, "n_parameters": 303924416} {"train_lr": 0.0029700371941066978, "train_min_lr": 0.0029700371941066978, "train_loss": 0.28479792445432395, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006526344676669209, "epoch": 111, "n_parameters": 303924416} {"train_lr": 0.002969445910204429, "train_min_lr": 0.002969445910204429, "train_loss": 0.28472331618496144, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006531984691555874, "epoch": 112, "n_parameters": 303924416} {"train_lr": 0.002968848909163951, "train_min_lr": 0.002968848909163951, "train_loss": 0.28465772661058086, "train_loss_scale": 1193091.282051282, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006548077036710218, "epoch": 113, "n_parameters": 303924416} {"train_lr": 0.0029682461933159374, "train_min_lr": 0.0029682461933159374, "train_loss": 0.28461774968160075, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006546002872383747, "epoch": 114, "n_parameters": 303924416} {"train_lr": 0.0029676377650133612, "train_min_lr": 0.0029676377650133612, "train_loss": 0.2845240763675135, "train_loss_scale": 937668.9230769231, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 115, "n_parameters": 303924416} {"train_lr": 0.0029670236266315076, "train_min_lr": 0.0029670236266315076, "train_loss": 0.2844791383369086, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006532865512268379, "epoch": 116, "n_parameters": 303924416} {"train_lr": 0.002966403780567945, "train_min_lr": 0.002966403780567945, "train_loss": 0.28443925817294097, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006557786373051409, "epoch": 117, "n_parameters": 303924416} {"train_lr": 0.002965778229242529, "train_min_lr": 0.002965778229242529, "train_loss": 0.2843511580961207, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006546893563026037, "epoch": 118, "n_parameters": 303924416} {"train_lr": 0.0029651469750973905, "train_min_lr": 0.0029651469750973905, "train_loss": 0.2844567477124003, "train_loss_scale": 517566.358974359, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 119, "n_parameters": 303924416} {"train_lr": 0.0029645100205969127, "train_min_lr": 0.0029645100205969127, "train_loss": 0.2843004421492179, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006546009799095396, "epoch": 120, "n_parameters": 303924416} {"train_lr": 0.002963867368227746, "train_min_lr": 0.002963867368227746, "train_loss": 0.28418301461706275, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.0065551155921406085, "epoch": 121, "n_parameters": 303924416} {"train_lr": 0.002963219020498775, "train_min_lr": 0.002963219020498775, "train_loss": 0.284125889878338, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00655104143348021, "epoch": 122, "n_parameters": 303924416} {"train_lr": 0.002962564979941127, "train_min_lr": 0.002962564979941127, "train_loss": 0.28404695747527653, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.0065188718547758, "epoch": 123, "n_parameters": 303924416} {"train_lr": 0.0029619052491081507, "train_min_lr": 0.0029619052491081507, "train_loss": 0.28400323821160084, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006567808645717704, "epoch": 124, "n_parameters": 303924416} {"train_lr": 0.0029612398305754115, "train_min_lr": 0.0029612398305754115, "train_loss": 0.2839850473922128, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006572009571708548, "epoch": 125, "n_parameters": 303924416} {"train_lr": 0.0029605687269406663, "train_min_lr": 0.0029605687269406663, "train_loss": 0.2838823489743309, "train_loss_scale": 423463.3846153846, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006552328380982941, "epoch": 126, "n_parameters": 303924416} {"train_lr": 0.0029598919408238863, "train_min_lr": 0.0029598919408238863, "train_loss": 0.2838624030417309, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00653652682637748, "epoch": 127, "n_parameters": 303924416} {"train_lr": 0.0029592094748672145, "train_min_lr": 0.0029592094748672145, "train_loss": 0.2838344923966827, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006563836690754845, "epoch": 128, "n_parameters": 303924416} {"train_lr": 0.0029585213317349685, "train_min_lr": 0.0029585213317349685, "train_loss": 0.2837561033606434, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006535323191947566, "epoch": 129, "n_parameters": 303924416} {"train_lr": 0.002957827514113639, "train_min_lr": 0.002957827514113639, "train_loss": 0.28372027432749, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006596911577686954, "epoch": 130, "n_parameters": 303924416} {"train_lr": 0.002957128024711851, "train_min_lr": 0.002957128024711851, "train_loss": 0.28362503678251344, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006548236874947086, "epoch": 131, "n_parameters": 303924416} {"train_lr": 0.00295642286626039, "train_min_lr": 0.00295642286626039, "train_loss": 0.2835681392656018, "train_loss_scale": 631834.2564102564, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006535994223295114, "epoch": 132, "n_parameters": 303924416} {"train_lr": 0.0029557120415121658, "train_min_lr": 0.0029557120415121658, "train_loss": 0.2835901477672637, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006526224697247529, "epoch": 133, "n_parameters": 303924416} {"train_lr": 0.0029549955532422074, "train_min_lr": 0.0029549955532422074, "train_loss": 0.2834751095282487, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006535351322665333, "epoch": 134, "n_parameters": 303924416} {"train_lr": 0.0029542734042476613, "train_min_lr": 0.0029542734042476613, "train_loss": 0.28346531574005407, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00656129845764297, "epoch": 135, "n_parameters": 303924416} {"train_lr": 0.0029535455973477634, "train_min_lr": 0.0029535455973477634, "train_loss": 0.2834195462383855, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006564506278850903, "epoch": 136, "n_parameters": 303924416} {"train_lr": 0.0029528121353838465, "train_min_lr": 0.0029528121353838465, "train_loss": 0.28344498151459563, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.0065448684528326756, "epoch": 137, "n_parameters": 303924416} {"train_lr": 0.002952073021219313, "train_min_lr": 0.002952073021219313, "train_loss": 0.28332159292096126, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006546345099102324, "epoch": 138, "n_parameters": 303924416} {"train_lr": 0.002951328257739638, "train_min_lr": 0.002951328257739638, "train_loss": 0.283317285667484, "train_loss_scale": 1882059.4871794872, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006570997744953881, "epoch": 139, "n_parameters": 303924416} {"train_lr": 0.002950577847852346, "train_min_lr": 0.002950577847852346, "train_loss": 0.2832409103329365, "train_loss_scale": 1095627.4871794872, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 140, "n_parameters": 303924416} {"train_lr": 0.0029498217944870075, "train_min_lr": 0.0029498217944870075, "train_loss": 0.2831740142741742, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.0065010338066480095, "epoch": 141, "n_parameters": 303924416} {"train_lr": 0.002949060100595227, "train_min_lr": 0.002949060100595227, "train_loss": 0.2831733564559657, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.0065324150745231565, "epoch": 142, "n_parameters": 303924416} {"train_lr": 0.0029482927691506233, "train_min_lr": 0.0029482927691506233, "train_loss": 0.28317768066130483, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006640308808224897, "epoch": 143, "n_parameters": 303924416} {"train_lr": 0.0029475198031488313, "train_min_lr": 0.0029475198031488313, "train_loss": 0.2830960236442013, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006566213433427784, "epoch": 144, "n_parameters": 303924416} {"train_lr": 0.002946741205607474, "train_min_lr": 0.002946741205607474, "train_loss": 0.2830409013534872, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00655787310246617, "epoch": 145, "n_parameters": 303924416} {"train_lr": 0.002945956979566174, "train_min_lr": 0.002945956979566174, "train_loss": 0.28301231742788774, "train_loss_scale": 1283833.435897436, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 146, "n_parameters": 303924416} {"train_lr": 0.0029451671280865087, "train_min_lr": 0.0029451671280865087, "train_loss": 0.28298000193451744, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006548425931937228, "epoch": 147, "n_parameters": 303924416} {"train_lr": 0.002944371654252032, "train_min_lr": 0.002944371654252032, "train_loss": 0.28295064052172864, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00652777968505875, "epoch": 148, "n_parameters": 303924416} {"train_lr": 0.0029435705611682425, "train_min_lr": 0.0029435705611682425, "train_loss": 0.2828386349555774, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006549585328544849, "epoch": 149, "n_parameters": 303924416} {"train_lr": 0.0029427638519625714, "train_min_lr": 0.0029427638519625714, "train_loss": 0.2828258113631119, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.0065378012726656515, "epoch": 150, "n_parameters": 303924416} {"train_lr": 0.002941951529784382, "train_min_lr": 0.002941951529784382, "train_loss": 0.2827946013645627, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006540939026774886, "epoch": 151, "n_parameters": 303924416} {"train_lr": 0.00294113359780495, "train_min_lr": 0.00294113359780495, "train_loss": 0.2827617755511967, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006542623954980324, "epoch": 152, "n_parameters": 303924416} {"train_lr": 0.0029403100592174437, "train_min_lr": 0.0029403100592174437, "train_loss": 0.2827517151957951, "train_loss_scale": 2003049.0256410257, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00656119854344676, "epoch": 153, "n_parameters": 303924416} {"train_lr": 0.0029394809172369253, "train_min_lr": 0.0029394809172369253, "train_loss": 0.28270997524631614, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006541022568200834, "epoch": 154, "n_parameters": 303924416} {"train_lr": 0.002938646175100337, "train_min_lr": 0.002938646175100337, "train_loss": 0.28263842622534585, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00656900628326604, "epoch": 155, "n_parameters": 303924416} {"train_lr": 0.0029378058360664777, "train_min_lr": 0.0029378058360664777, "train_loss": 0.28262514658033466, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00665329283891389, "epoch": 156, "n_parameters": 303924416} {"train_lr": 0.002936959903415989, "train_min_lr": 0.002936959903415989, "train_loss": 0.2825945016468326, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006552034165137089, "epoch": 157, "n_parameters": 303924416} {"train_lr": 0.0029361083804513677, "train_min_lr": 0.0029361083804513677, "train_loss": 0.2825541728331397, "train_loss_scale": 1351049.8461538462, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 158, "n_parameters": 303924416} {"train_lr": 0.0029352512704969233, "train_min_lr": 0.0029352512704969233, "train_loss": 0.2824935263160091, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006573741609314218, "epoch": 159, "n_parameters": 303924416} {"train_lr": 0.0029343885768987757, "train_min_lr": 0.0029343885768987757, "train_loss": 0.2824265859179342, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006575629519573294, "epoch": 160, "n_parameters": 303924416} {"train_lr": 0.002933520303024848, "train_min_lr": 0.002933520303024848, "train_loss": 0.28238880552518636, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.0065309657843616335, "epoch": 161, "n_parameters": 303924416} {"train_lr": 0.0029326464522648503, "train_min_lr": 0.0029326464522648503, "train_loss": 0.28238716241246903, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.0065775005137906056, "epoch": 162, "n_parameters": 303924416} {"train_lr": 0.0029317670280302522, "train_min_lr": 0.0029317670280302522, "train_loss": 0.28235298627987504, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00656032801952022, "epoch": 163, "n_parameters": 303924416} {"train_lr": 0.0029308820337542985, "train_min_lr": 0.0029308820337542985, "train_loss": 0.282339555491956, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006557399736872564, "epoch": 164, "n_parameters": 303924416} {"train_lr": 0.0029299914728919654, "train_min_lr": 0.0029299914728919654, "train_loss": 0.2823363520258751, "train_loss_scale": 917504.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006574494028404259, "epoch": 165, "n_parameters": 303924416} {"train_lr": 0.0029290953489199754, "train_min_lr": 0.0029290953489199754, "train_loss": 0.2823306675254105, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00659577750439684, "epoch": 166, "n_parameters": 303924416} {"train_lr": 0.00292819366533675, "train_min_lr": 0.00292819366533675, "train_loss": 0.282291719248184, "train_loss_scale": 584782.7692307692, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 167, "n_parameters": 303924416} {"train_lr": 0.0029272864256624344, "train_min_lr": 0.0029272864256624344, "train_loss": 0.28225444422330326, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006575444028175508, "epoch": 168, "n_parameters": 303924416} {"train_lr": 0.002926373633438852, "train_min_lr": 0.002926373633438852, "train_loss": 0.28218731824428034, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.0065572504581023865, "epoch": 169, "n_parameters": 303924416} {"train_lr": 0.002925455292229509, "train_min_lr": 0.002925455292229509, "train_loss": 0.2821802085575958, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00659028400416271, "epoch": 170, "n_parameters": 303924416} {"train_lr": 0.0029245314056195694, "train_min_lr": 0.0029245314056195694, "train_loss": 0.28213009622115165, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006583568971389188, "epoch": 171, "n_parameters": 303924416} {"train_lr": 0.0029236019772158526, "train_min_lr": 0.0029236019772158526, "train_loss": 0.2821001352336353, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.0065506531003241735, "epoch": 172, "n_parameters": 303924416} {"train_lr": 0.0029226670106468075, "train_min_lr": 0.0029226670106468075, "train_loss": 0.282044806281845, "train_loss_scale": 772988.717948718, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006568138559277241, "epoch": 173, "n_parameters": 303924416} {"train_lr": 0.0029217265095625097, "train_min_lr": 0.0029217265095625097, "train_loss": 0.28207603389791286, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006585037222323127, "epoch": 174, "n_parameters": 303924416} {"train_lr": 0.002920780477634638, "train_min_lr": 0.002920780477634638, "train_loss": 0.2820561516176288, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006602271689842336, "epoch": 175, "n_parameters": 303924416} {"train_lr": 0.002919828918556457, "train_min_lr": 0.002919828918556457, "train_loss": 0.2820273282466074, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00659133653747491, "epoch": 176, "n_parameters": 303924416} {"train_lr": 0.0029188718360428187, "train_min_lr": 0.0029188718360428187, "train_loss": 0.28199130799084043, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006593933720022249, "epoch": 177, "n_parameters": 303924416} {"train_lr": 0.0029179092338301395, "train_min_lr": 0.0029179092338301395, "train_loss": 0.2819511973394606, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006569032258807848, "epoch": 178, "n_parameters": 303924416} {"train_lr": 0.002916941115676371, "train_min_lr": 0.002916941115676371, "train_loss": 0.2819265096496122, "train_loss_scale": 751143.3846153846, "train_weight_decay": 0.050000000000000266, "train_grad_norm": Infinity, "epoch": 179, "n_parameters": 303924416} {"train_lr": 0.0029159674853610168, "train_min_lr": 0.0029159674853610168, "train_loss": 0.28184678578355277, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00655939773722098, "epoch": 180, "n_parameters": 303924416} {"train_lr": 0.0029149883466850833, "train_min_lr": 0.0029149883466850833, "train_loss": 0.28184095514305413, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.0065592100996022615, "epoch": 181, "n_parameters": 303924416} {"train_lr": 0.0029140037034710927, "train_min_lr": 0.0029140037034710927, "train_loss": 0.2817948459683416, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006589937732757953, "epoch": 182, "n_parameters": 303924416} {"train_lr": 0.0029130135595630516, "train_min_lr": 0.0029130135595630516, "train_loss": 0.2817733681623418, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006586830489910566, "epoch": 183, "n_parameters": 303924416} {"train_lr": 0.0029120179188264392, "train_min_lr": 0.0029120179188264392, "train_loss": 0.28175686925458604, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006595818724375791, "epoch": 184, "n_parameters": 303924416} {"train_lr": 0.002911016785148203, "train_min_lr": 0.002911016785148203, "train_loss": 0.28174901578145534, "train_loss_scale": 606628.1025641026, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006622211630038248, "epoch": 185, "n_parameters": 303924416} {"train_lr": 0.0029100101624367283, "train_min_lr": 0.0029100101624367283, "train_loss": 0.28176779913393635, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006578540684691129, "epoch": 186, "n_parameters": 303924416} {"train_lr": 0.002908998054621824, "train_min_lr": 0.002908998054621824, "train_loss": 0.28166094798451435, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006561582155812245, "epoch": 187, "n_parameters": 303924416} {"train_lr": 0.0029079804656547237, "train_min_lr": 0.0029079804656547237, "train_loss": 0.2816531723723389, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006578366157228652, "epoch": 188, "n_parameters": 303924416} {"train_lr": 0.0029069573995080546, "train_min_lr": 0.0029069573995080546, "train_loss": 0.28167730420290565, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006599433042514973, "epoch": 189, "n_parameters": 303924416} {"train_lr": 0.002905928860175819, "train_min_lr": 0.002905928860175819, "train_loss": 0.28159421858771777, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006580212017676483, "epoch": 190, "n_parameters": 303924416} {"train_lr": 0.002904894851673399, "train_min_lr": 0.002904894851673399, "train_loss": 0.2815384298133162, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006577199263772808, "epoch": 191, "n_parameters": 303924416} {"train_lr": 0.002903855378037524, "train_min_lr": 0.002903855378037524, "train_loss": 0.28157995746900827, "train_loss_scale": 1619915.4871794872, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 192, "n_parameters": 303924416} {"train_lr": 0.0029028104433262503, "train_min_lr": 0.0029028104433262503, "train_loss": 0.2815042473525048, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.0066079336549871815, "epoch": 193, "n_parameters": 303924416} {"train_lr": 0.0029017600516189723, "train_min_lr": 0.0029017600516189723, "train_loss": 0.28148986395591724, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006592405868921047, "epoch": 194, "n_parameters": 303924416} {"train_lr": 0.0029007042070163655, "train_min_lr": 0.0029007042070163655, "train_loss": 0.28147992563063806, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006597944324871955, "epoch": 195, "n_parameters": 303924416} {"train_lr": 0.002899642913640412, "train_min_lr": 0.002899642913640412, "train_loss": 0.2814532818738371, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.0065759743044439415, "epoch": 196, "n_parameters": 303924416} {"train_lr": 0.002898576175634354, "train_min_lr": 0.002898576175634354, "train_loss": 0.2814327919593033, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006603494885031325, "epoch": 197, "n_parameters": 303924416} {"train_lr": 0.002897503997162699, "train_min_lr": 0.002897503997162699, "train_loss": 0.28141486472808397, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006633825195678629, "epoch": 198, "n_parameters": 303924416} {"train_lr": 0.002896426382411189, "train_min_lr": 0.002896426382411189, "train_loss": 0.28134688281883985, "train_loss_scale": 1878698.6666666667, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006609752787671124, "epoch": 199, "n_parameters": 303924416} {"train_lr": 0.002895343335586782, "train_min_lr": 0.002895343335586782, "train_loss": 0.2813334212047406, "train_loss_scale": 1761069.9487179487, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 200, "n_parameters": 303924416} {"train_lr": 0.002894254860917656, "train_min_lr": 0.002894254860917656, "train_loss": 0.2813559399010279, "train_loss_scale": 823401.0256410256, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 201, "n_parameters": 303924416} {"train_lr": 0.0028931609626531673, "train_min_lr": 0.0028931609626531673, "train_loss": 0.2812661265966315, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006599435070828081, "epoch": 202, "n_parameters": 303924416} {"train_lr": 0.0028920616450638596, "train_min_lr": 0.0028920616450638596, "train_loss": 0.2812877881507843, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006604748436261733, "epoch": 203, "n_parameters": 303924416} {"train_lr": 0.0028909569124414124, "train_min_lr": 0.0028909569124414124, "train_loss": 0.2812757055448273, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006697023453083462, "epoch": 204, "n_parameters": 303924416} {"train_lr": 0.002889846769098671, "train_min_lr": 0.002889846769098671, "train_loss": 0.2812658434506888, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006644308420781715, "epoch": 205, "n_parameters": 303924416} {"train_lr": 0.0028887312193695805, "train_min_lr": 0.0028887312193695805, "train_loss": 0.2813297988405117, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006632309598036301, "epoch": 206, "n_parameters": 303924416} {"train_lr": 0.002887610267609204, "train_min_lr": 0.002887610267609204, "train_loss": 0.2812242873418981, "train_loss_scale": 534370.4615384615, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006607820976895686, "epoch": 207, "n_parameters": 303924416} {"train_lr": 0.002886483918193695, "train_min_lr": 0.002886483918193695, "train_loss": 0.2811608021744551, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006612282000088061, "epoch": 208, "n_parameters": 303924416} {"train_lr": 0.0028853521755202796, "train_min_lr": 0.0028853521755202796, "train_loss": 0.2811526563854363, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006621018352094465, "epoch": 209, "n_parameters": 303924416} {"train_lr": 0.0028842150440072253, "train_min_lr": 0.0028842150440072253, "train_loss": 0.281128682825977, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006635528936898574, "epoch": 210, "n_parameters": 303924416} {"train_lr": 0.0028830725280938638, "train_min_lr": 0.0028830725280938638, "train_loss": 0.28110503490703803, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006596954840904054, "epoch": 211, "n_parameters": 303924416} {"train_lr": 0.002881924632240516, "train_min_lr": 0.002881924632240516, "train_loss": 0.28110798792794156, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006660473509094654, "epoch": 212, "n_parameters": 303924416} {"train_lr": 0.002880771360928527, "train_min_lr": 0.002880771360928527, "train_loss": 0.2810548852579907, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006609876958044389, "epoch": 213, "n_parameters": 303924416} {"train_lr": 0.0028796127186602205, "train_min_lr": 0.0028796127186602205, "train_loss": 0.28104488644748926, "train_loss_scale": 1687131.8974358975, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00660586949939338, "epoch": 214, "n_parameters": 303924416} {"train_lr": 0.0028784487099588964, "train_min_lr": 0.0028784487099588964, "train_loss": 0.28105815032270187, "train_loss_scale": 1058658.4615384615, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 215, "n_parameters": 303924416} {"train_lr": 0.0028772793393687934, "train_min_lr": 0.0028772793393687934, "train_loss": 0.28101824148175997, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006626010115724057, "epoch": 216, "n_parameters": 303924416} {"train_lr": 0.002876104611455086, "train_min_lr": 0.002876104611455086, "train_loss": 0.2809957858975021, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.0066085877568604285, "epoch": 217, "n_parameters": 303924416} {"train_lr": 0.002874924530803863, "train_min_lr": 0.002874924530803863, "train_loss": 0.28094532541954553, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006636334536895634, "epoch": 218, "n_parameters": 303924416} {"train_lr": 0.002873739102022118, "train_min_lr": 0.002873739102022118, "train_loss": 0.2809960369217711, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006794676687628127, "epoch": 219, "n_parameters": 303924416} {"train_lr": 0.0028725483297377154, "train_min_lr": 0.0028725483297377154, "train_loss": 0.2809039802194979, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006619465064245443, "epoch": 220, "n_parameters": 303924416} {"train_lr": 0.0028713522185993853, "train_min_lr": 0.0028713522185993853, "train_loss": 0.2809406416580224, "train_loss_scale": 1564461.9487179487, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 221, "n_parameters": 303924416} {"train_lr": 0.0028701507732766898, "train_min_lr": 0.0028701507732766898, "train_loss": 0.2808786350368068, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006618450367405342, "epoch": 222, "n_parameters": 303924416} {"train_lr": 0.002868943998460023, "train_min_lr": 0.002868943998460023, "train_loss": 0.2808668410662227, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006631206910615453, "epoch": 223, "n_parameters": 303924416} {"train_lr": 0.0028677318988605963, "train_min_lr": 0.0028677318988605963, "train_loss": 0.28083993376901323, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00664619106399373, "epoch": 224, "n_parameters": 303924416} {"train_lr": 0.0028665144792103924, "train_min_lr": 0.0028665144792103924, "train_loss": 0.2808588278742555, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00666111490677278, "epoch": 225, "n_parameters": 303924416} {"train_lr": 0.0028652917442621704, "train_min_lr": 0.0028652917442621704, "train_loss": 0.2807922486979992, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006597795496838024, "epoch": 226, "n_parameters": 303924416} {"train_lr": 0.0028640636987894296, "train_min_lr": 0.0028640636987894296, "train_loss": 0.2807755533587904, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006622223380523233, "epoch": 227, "n_parameters": 303924416} {"train_lr": 0.002862830347586419, "train_min_lr": 0.002862830347586419, "train_loss": 0.28075112911681527, "train_loss_scale": 835163.8974358974, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.0066190303545087, "epoch": 228, "n_parameters": 303924416} {"train_lr": 0.002861591695468095, "train_min_lr": 0.002861591695468095, "train_loss": 0.2807461672569983, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.0066313273422658825, "epoch": 229, "n_parameters": 303924416} {"train_lr": 0.0028603477472700974, "train_min_lr": 0.0028603477472700974, "train_loss": 0.28074274067349064, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006668301106681331, "epoch": 230, "n_parameters": 303924416} {"train_lr": 0.002859098507848755, "train_min_lr": 0.002859098507848755, "train_loss": 0.2807242320999742, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 231, "n_parameters": 303924416} {"train_lr": 0.0028578439820810507, "train_min_lr": 0.0028578439820810507, "train_loss": 0.28071959000319624, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006700593649815672, "epoch": 232, "n_parameters": 303924416} {"train_lr": 0.0028565841748646012, "train_min_lr": 0.0028565841748646012, "train_loss": 0.2806955677224323, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006668707587195035, "epoch": 233, "n_parameters": 303924416} {"train_lr": 0.0028553190911176384, "train_min_lr": 0.0028553190911176384, "train_loss": 0.28067495128235376, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006662638150513745, "epoch": 234, "n_parameters": 303924416} {"train_lr": 0.0028540487357790047, "train_min_lr": 0.0028540487357790047, "train_loss": 0.2806156452697439, "train_loss_scale": 367169.641025641, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 235, "n_parameters": 303924416} {"train_lr": 0.0028527731138081117, "train_min_lr": 0.0028527731138081117, "train_loss": 0.28055550906962406, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006605549460241141, "epoch": 236, "n_parameters": 303924416} {"train_lr": 0.0028514922301849416, "train_min_lr": 0.0028514922301849416, "train_loss": 0.28059180009250456, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006663086045031937, "epoch": 237, "n_parameters": 303924416} {"train_lr": 0.002850206089910009, "train_min_lr": 0.002850206089910009, "train_loss": 0.280556848174176, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006634842385000621, "epoch": 238, "n_parameters": 303924416} {"train_lr": 0.0028489146980043545, "train_min_lr": 0.0028489146980043545, "train_loss": 0.2805640726702479, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.0066400484811777296, "epoch": 239, "n_parameters": 303924416} {"train_lr": 0.0028476180595095237, "train_min_lr": 0.0028476180595095237, "train_loss": 0.2805289345023294, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006646339348756159, "epoch": 240, "n_parameters": 303924416} {"train_lr": 0.002846316179487536, "train_min_lr": 0.002846316179487536, "train_loss": 0.28048757093851096, "train_loss_scale": 311716.10256410256, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006637837596010799, "epoch": 241, "n_parameters": 303924416} {"train_lr": 0.0028450090630208814, "train_min_lr": 0.0028450090630208814, "train_loss": 0.2805147498326663, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006670978039694138, "epoch": 242, "n_parameters": 303924416} {"train_lr": 0.0028436967152124944, "train_min_lr": 0.0028436967152124944, "train_loss": 0.2804309187952477, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006655257953426395, "epoch": 243, "n_parameters": 303924416} {"train_lr": 0.0028423791411857206, "train_min_lr": 0.0028423791411857206, "train_loss": 0.2804969731383981, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00672302736590306, "epoch": 244, "n_parameters": 303924416} {"train_lr": 0.0028410563460843246, "train_min_lr": 0.0028410563460843246, "train_loss": 0.2804313922772566, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006643930087403323, "epoch": 245, "n_parameters": 303924416} {"train_lr": 0.0028397283350724365, "train_min_lr": 0.0028397283350724365, "train_loss": 0.2804686319238196, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006699564241362401, "epoch": 246, "n_parameters": 303924416} {"train_lr": 0.002838395113334564, "train_min_lr": 0.002838395113334564, "train_loss": 0.2803995429765051, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006665503168896509, "epoch": 247, "n_parameters": 303924416} {"train_lr": 0.0028370566860755502, "train_min_lr": 0.0028370566860755502, "train_loss": 0.2803733395561815, "train_loss_scale": 932627.6923076923, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006638637948363351, "epoch": 248, "n_parameters": 303924416} {"train_lr": 0.0028357130585205613, "train_min_lr": 0.0028357130585205613, "train_loss": 0.2804021837536054, "train_loss_scale": 631834.2564102564, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 249, "n_parameters": 303924416} {"train_lr": 0.0028343642359150677, "train_min_lr": 0.0028343642359150677, "train_loss": 0.28033940959721804, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006636998404223376, "epoch": 250, "n_parameters": 303924416} {"train_lr": 0.002833010223524816, "train_min_lr": 0.002833010223524816, "train_loss": 0.2803283955973501, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006662226194002403, "epoch": 251, "n_parameters": 303924416} {"train_lr": 0.0028316510266358143, "train_min_lr": 0.0028316510266358143, "train_loss": 0.28031131952332383, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006650958765441408, "epoch": 252, "n_parameters": 303924416} {"train_lr": 0.0028302866505543183, "train_min_lr": 0.0028302866505543183, "train_loss": 0.2803390245639886, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00670049098194935, "epoch": 253, "n_parameters": 303924416} {"train_lr": 0.002828917100606794, "train_min_lr": 0.002828917100606794, "train_loss": 0.28029124355182433, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.0066714629378671255, "epoch": 254, "n_parameters": 303924416} {"train_lr": 0.0028275423821399106, "train_min_lr": 0.0028275423821399106, "train_loss": 0.28028296133001834, "train_loss_scale": 699050.6666666666, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 255, "n_parameters": 303924416} {"train_lr": 0.002826162500520514, "train_min_lr": 0.002826162500520514, "train_loss": 0.280260397389961, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.0066541855742868325, "epoch": 256, "n_parameters": 303924416} {"train_lr": 0.002824777461135607, "train_min_lr": 0.002824777461135607, "train_loss": 0.28029623937805015, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006713973707519472, "epoch": 257, "n_parameters": 303924416} {"train_lr": 0.0028233872693923315, "train_min_lr": 0.0028233872693923315, "train_loss": 0.28037037276543486, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006823725367072397, "epoch": 258, "n_parameters": 303924416} {"train_lr": 0.0028219919307179283, "train_min_lr": 0.0028219919307179283, "train_loss": 0.2802639519945026, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.0066787528220373085, "epoch": 259, "n_parameters": 303924416} {"train_lr": 0.0028205914505597556, "train_min_lr": 0.0028205914505597556, "train_loss": 0.28018786482966673, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.0066514744733770685, "epoch": 260, "n_parameters": 303924416} {"train_lr": 0.002819185834385233, "train_min_lr": 0.002819185834385233, "train_loss": 0.28018990045604414, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006661797912480931, "epoch": 261, "n_parameters": 303924416} {"train_lr": 0.002817775087681821, "train_min_lr": 0.002817775087681821, "train_loss": 0.28015615647801984, "train_loss_scale": 860370.0512820513, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006674054539941538, "epoch": 262, "n_parameters": 303924416} {"train_lr": 0.0028163592159570175, "train_min_lr": 0.0028163592159570175, "train_loss": 0.28016475132869506, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.0067132820646899445, "epoch": 263, "n_parameters": 303924416} {"train_lr": 0.0028149382247383407, "train_min_lr": 0.0028149382247383407, "train_loss": 0.2801813677413007, "train_loss_scale": 788112.4102564103, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 264, "n_parameters": 303924416} {"train_lr": 0.0028135121195732755, "train_min_lr": 0.0028135121195732755, "train_loss": 0.28015319996465665, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00670814123422576, "epoch": 265, "n_parameters": 303924416} {"train_lr": 0.002812080906029277, "train_min_lr": 0.002812080906029277, "train_loss": 0.28014213798675114, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006704681729062054, "epoch": 266, "n_parameters": 303924416} {"train_lr": 0.0028106445896937493, "train_min_lr": 0.0028106445896937493, "train_loss": 0.2800765519675154, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.0066957889447131986, "epoch": 267, "n_parameters": 303924416} {"train_lr": 0.002809203176174018, "train_min_lr": 0.002809203176174018, "train_loss": 0.2800494307765546, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006694249310291922, "epoch": 268, "n_parameters": 303924416} {"train_lr": 0.0028077566710972965, "train_min_lr": 0.0028077566710972965, "train_loss": 0.28000181353388304, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.0066803902378664, "epoch": 269, "n_parameters": 303924416} {"train_lr": 0.002806305080110684, "train_min_lr": 0.002806305080110684, "train_loss": 0.2800780417284188, "train_loss_scale": 569659.0769230769, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.0066862906454704125, "epoch": 270, "n_parameters": 303924416} {"train_lr": 0.002804848408881137, "train_min_lr": 0.002804848408881137, "train_loss": 0.2800212802323632, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006662551500201703, "epoch": 271, "n_parameters": 303924416} {"train_lr": 0.0028033866630954372, "train_min_lr": 0.0028033866630954372, "train_loss": 0.2799783364350263, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006678874768579426, "epoch": 272, "n_parameters": 303924416} {"train_lr": 0.0028019198484601905, "train_min_lr": 0.0028019198484601905, "train_loss": 0.28000042783633733, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006695908468921119, "epoch": 273, "n_parameters": 303924416} {"train_lr": 0.0028004479707017717, "train_min_lr": 0.0028004479707017717, "train_loss": 0.27998276778425163, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006677105861644332, "epoch": 274, "n_parameters": 303924416} {"train_lr": 0.002798971035566338, "train_min_lr": 0.002798971035566338, "train_loss": 0.27996692873645, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006694261080179459, "epoch": 275, "n_parameters": 303924416} {"train_lr": 0.002797489048819781, "train_min_lr": 0.002797489048819781, "train_loss": 0.27997996541372955, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006721789585963751, "epoch": 276, "n_parameters": 303924416} {"train_lr": 0.002796002016247727, "train_min_lr": 0.002796002016247727, "train_loss": 0.27995718743962544, "train_loss_scale": 1139318.1538461538, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 277, "n_parameters": 303924416} {"train_lr": 0.0027945099436554794, "train_min_lr": 0.0027945099436554794, "train_loss": 0.27991275454405695, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006715382573206742, "epoch": 278, "n_parameters": 303924416} {"train_lr": 0.0027930128368680407, "train_min_lr": 0.0027930128368680407, "train_loss": 0.27995182702747673, "train_loss_scale": 707452.717948718, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 279, "n_parameters": 303924416} {"train_lr": 0.002791510701730047, "train_min_lr": 0.002791510701730047, "train_loss": 0.27994009715098983, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006739027025058675, "epoch": 280, "n_parameters": 303924416} {"train_lr": 0.0027900035441057827, "train_min_lr": 0.0027900035441057827, "train_loss": 0.27992705926478195, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006725395078926037, "epoch": 281, "n_parameters": 303924416} {"train_lr": 0.0027884913698791205, "train_min_lr": 0.0027884913698791205, "train_loss": 0.2798402465414256, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00667411401324595, "epoch": 282, "n_parameters": 303924416} {"train_lr": 0.002786974184953536, "train_min_lr": 0.002786974184953536, "train_loss": 0.2798791234142696, "train_loss_scale": 423463.3846153846, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 283, "n_parameters": 303924416} {"train_lr": 0.0027854519952520565, "train_min_lr": 0.0027854519952520565, "train_loss": 0.27996963857171625, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006823729846077278, "epoch": 284, "n_parameters": 303924416} {"train_lr": 0.002783924806717247, "train_min_lr": 0.002783924806717247, "train_loss": 0.27984539249565643, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.0066778918755097455, "epoch": 285, "n_parameters": 303924416} {"train_lr": 0.0027823926253111963, "train_min_lr": 0.0027823926253111963, "train_loss": 0.2798189896081264, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.0067097528998811655, "epoch": 286, "n_parameters": 303924416} {"train_lr": 0.0027808554570154715, "train_min_lr": 0.0027808554570154715, "train_loss": 0.27982112633971834, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00669026860776238, "epoch": 287, "n_parameters": 303924416} {"train_lr": 0.0027793133078311215, "train_min_lr": 0.0027793133078311215, "train_loss": 0.27978680594764554, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006766615042952486, "epoch": 288, "n_parameters": 303924416} {"train_lr": 0.0027777661837786348, "train_min_lr": 0.0027777661837786348, "train_loss": 0.27976447840830165, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00671003865919864, "epoch": 289, "n_parameters": 303924416} {"train_lr": 0.0027762140908979215, "train_min_lr": 0.0027762140908979215, "train_loss": 0.2797532627711264, "train_loss_scale": 517566.358974359, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006725888011248735, "epoch": 290, "n_parameters": 303924416} {"train_lr": 0.002774657035248286, "train_min_lr": 0.002774657035248286, "train_loss": 0.2797655716019038, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006732384307095065, "epoch": 291, "n_parameters": 303924416} {"train_lr": 0.002773095022908419, "train_min_lr": 0.002773095022908419, "train_loss": 0.27973765393611616, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00671057067423438, "epoch": 292, "n_parameters": 303924416} {"train_lr": 0.00277152805997634, "train_min_lr": 0.00277152805997634, "train_loss": 0.2797009508746366, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.0067158415062854495, "epoch": 293, "n_parameters": 303924416} {"train_lr": 0.002769956152569427, "train_min_lr": 0.002769956152569427, "train_loss": 0.2797004618586447, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00673845790935537, "epoch": 294, "n_parameters": 303924416} {"train_lr": 0.002768379306824332, "train_min_lr": 0.002768379306824332, "train_loss": 0.2796725058474411, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006724713564229508, "epoch": 295, "n_parameters": 303924416} {"train_lr": 0.002766797528897003, "train_min_lr": 0.002766797528897003, "train_loss": 0.27968555979000836, "train_loss_scale": 820040.2051282051, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006739447256012891, "epoch": 296, "n_parameters": 303924416} {"train_lr": 0.0027652108249626352, "train_min_lr": 0.0027652108249626352, "train_loss": 0.27972029217889005, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006751711584794789, "epoch": 297, "n_parameters": 303924416} {"train_lr": 0.002763619201215655, "train_min_lr": 0.002763619201215655, "train_loss": 0.27963166848983234, "train_loss_scale": 788952.6153846154, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 298, "n_parameters": 303924416} {"train_lr": 0.002762022663869706, "train_min_lr": 0.002762022663869706, "train_loss": 0.279662521266474, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00678439695501509, "epoch": 299, "n_parameters": 303924416} {"train_lr": 0.0027604212191575973, "train_min_lr": 0.0027604212191575973, "train_loss": 0.27976897361580855, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.0068944442065026704, "epoch": 300, "n_parameters": 303924416} {"train_lr": 0.0027588148733313092, "train_min_lr": 0.0027588148733313092, "train_loss": 0.27964861625626397, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006771431665700407, "epoch": 301, "n_parameters": 303924416} {"train_lr": 0.002757203632661952, "train_min_lr": 0.002757203632661952, "train_loss": 0.279600362218797, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006692833153721996, "epoch": 302, "n_parameters": 303924416} {"train_lr": 0.002755587503439741, "train_min_lr": 0.002755587503439741, "train_loss": 0.2795797559247615, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.0067345362323790025, "epoch": 303, "n_parameters": 303924416} {"train_lr": 0.002753966491973985, "train_min_lr": 0.002753966491973985, "train_loss": 0.2795957445781917, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.0067460528986815075, "epoch": 304, "n_parameters": 303924416} {"train_lr": 0.0027523406045930456, "train_min_lr": 0.0027523406045930456, "train_loss": 0.27959984988093567, "train_loss_scale": 459592.2051282051, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00674080219752609, "epoch": 305, "n_parameters": 303924416} {"train_lr": 0.0027507098476443215, "train_min_lr": 0.0027507098476443215, "train_loss": 0.27951718298479533, "train_loss_scale": 513365.3333333333, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 306, "n_parameters": 303924416} {"train_lr": 0.0027490742274942233, "train_min_lr": 0.0027490742274942233, "train_loss": 0.2795366457251545, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006712940984214537, "epoch": 307, "n_parameters": 303924416} {"train_lr": 0.002747433750528143, "train_min_lr": 0.002747433750528143, "train_loss": 0.2795250259208469, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006766552957765854, "epoch": 308, "n_parameters": 303924416} {"train_lr": 0.0027457884231504442, "train_min_lr": 0.0027457884231504442, "train_loss": 0.27952054515182495, "train_loss_scale": 165940.5128205128, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 309, "n_parameters": 303924416} {"train_lr": 0.002744138251784411, "train_min_lr": 0.002744138251784411, "train_loss": 0.2794744030644114, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006715866323644057, "epoch": 310, "n_parameters": 303924416} {"train_lr": 0.00274248324287225, "train_min_lr": 0.00274248324287225, "train_loss": 0.2795344117330387, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006765667059554312, "epoch": 311, "n_parameters": 303924416} {"train_lr": 0.0027408234028750557, "train_min_lr": 0.0027408234028750557, "train_loss": 0.27948801587813366, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.0067302833851546244, "epoch": 312, "n_parameters": 303924416} {"train_lr": 0.0027391587382727695, "train_min_lr": 0.0027391587382727695, "train_loss": 0.27945613235127753, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.0067328694679487785, "epoch": 313, "n_parameters": 303924416} {"train_lr": 0.0027374892555641762, "train_min_lr": 0.0027374892555641762, "train_loss": 0.27940850945201534, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006696601047550734, "epoch": 314, "n_parameters": 303924416} {"train_lr": 0.002735814961266868, "train_min_lr": 0.002735814961266868, "train_loss": 0.2794344454752998, "train_loss_scale": 173502.35897435897, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.0067359983252409175, "epoch": 315, "n_parameters": 303924416} {"train_lr": 0.002734135861917227, "train_min_lr": 0.002734135861917227, "train_loss": 0.2794102754217023, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006742329209839973, "epoch": 316, "n_parameters": 303924416} {"train_lr": 0.002732451964070391, "train_min_lr": 0.002732451964070391, "train_loss": 0.27940995564373833, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006782800756179943, "epoch": 317, "n_parameters": 303924416} {"train_lr": 0.0027307632743002247, "train_min_lr": 0.0027307632743002247, "train_loss": 0.279431868577376, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006775017178509957, "epoch": 318, "n_parameters": 303924416} {"train_lr": 0.0027290697991993085, "train_min_lr": 0.0027290697991993085, "train_loss": 0.279390316340141, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006730567614655369, "epoch": 319, "n_parameters": 303924416} {"train_lr": 0.0027273715453788993, "train_min_lr": 0.0027273715453788993, "train_loss": 0.2793490110227886, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006704923858006413, "epoch": 320, "n_parameters": 303924416} {"train_lr": 0.0027256685194689133, "train_min_lr": 0.0027256685194689133, "train_loss": 0.27931625653320974, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006718716852796765, "epoch": 321, "n_parameters": 303924416} {"train_lr": 0.0027239607281178977, "train_min_lr": 0.0027239607281178977, "train_loss": 0.27932656301149666, "train_loss_scale": 501602.46153846156, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.0067353642288929755, "epoch": 322, "n_parameters": 303924416} {"train_lr": 0.002722248177992999, "train_min_lr": 0.002722248177992999, "train_loss": 0.27932975901804197, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006746048716684947, "epoch": 323, "n_parameters": 303924416} {"train_lr": 0.0027205308757799426, "train_min_lr": 0.0027205308757799426, "train_loss": 0.27928610268431026, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006720488855185417, "epoch": 324, "n_parameters": 303924416} {"train_lr": 0.002718808828183009, "train_min_lr": 0.002718808828183009, "train_loss": 0.2792583039143863, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006736038369126618, "epoch": 325, "n_parameters": 303924416} {"train_lr": 0.002717082041925007, "train_min_lr": 0.002717082041925007, "train_loss": 0.2792828762336658, "train_loss_scale": 385654.1538461539, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 326, "n_parameters": 303924416} {"train_lr": 0.0027153505237472383, "train_min_lr": 0.0027153505237472383, "train_loss": 0.27925154832132065, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006725009594960377, "epoch": 327, "n_parameters": 303924416} {"train_lr": 0.0027136142804094735, "train_min_lr": 0.0027136142804094735, "train_loss": 0.2792741285684781, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006852584447747527, "epoch": 328, "n_parameters": 303924416} {"train_lr": 0.0027118733186899478, "train_min_lr": 0.0027118733186899478, "train_loss": 0.279328135925775, "train_loss_scale": 234417.23076923078, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 329, "n_parameters": 303924416} {"train_lr": 0.0027101276453853035, "train_min_lr": 0.0027101276453853035, "train_loss": 0.27929934839574766, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006769698184843247, "epoch": 330, "n_parameters": 303924416} {"train_lr": 0.0027083772673105774, "train_min_lr": 0.0027083772673105774, "train_loss": 0.2792450603312598, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00678800515537986, "epoch": 331, "n_parameters": 303924416} {"train_lr": 0.0027066221912991715, "train_min_lr": 0.0027066221912991715, "train_loss": 0.27939612741988057, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00699713023361535, "epoch": 332, "n_parameters": 303924416} {"train_lr": 0.002704862424202841, "train_min_lr": 0.002704862424202841, "train_loss": 0.27925037550859344, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006733247031195042, "epoch": 333, "n_parameters": 303924416} {"train_lr": 0.0027030979728916386, "train_min_lr": 0.0027030979728916386, "train_loss": 0.2792597740637855, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006748628342906252, "epoch": 334, "n_parameters": 303924416} {"train_lr": 0.002701328844253914, "train_min_lr": 0.002701328844253914, "train_loss": 0.2791767124361239, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.0067377975688148765, "epoch": 335, "n_parameters": 303924416} {"train_lr": 0.0026995550451962757, "train_min_lr": 0.0026995550451962757, "train_loss": 0.2792047555259883, "train_loss_scale": 236097.64102564103, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006782769122554992, "epoch": 336, "n_parameters": 303924416} {"train_lr": 0.002697776582643566, "train_min_lr": 0.002697776582643566, "train_loss": 0.27912758260619086, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006729578870969514, "epoch": 337, "n_parameters": 303924416} {"train_lr": 0.00269599346353883, "train_min_lr": 0.00269599346353883, "train_loss": 0.2791548226452552, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006773588646095819, "epoch": 338, "n_parameters": 303924416} {"train_lr": 0.0026942056948432926, "train_min_lr": 0.0026942056948432926, "train_loss": 0.27916368858840984, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006727788583912815, "epoch": 339, "n_parameters": 303924416} {"train_lr": 0.0026924132835363284, "train_min_lr": 0.0026924132835363284, "train_loss": 0.2790959398799504, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00675378900875027, "epoch": 340, "n_parameters": 303924416} {"train_lr": 0.002690616236615441, "train_min_lr": 0.002690616236615441, "train_loss": 0.2790938587530922, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.0067573025783834355, "epoch": 341, "n_parameters": 303924416} {"train_lr": 0.0026888145610962336, "train_min_lr": 0.0026888145610962336, "train_loss": 0.27908849656784857, "train_loss_scale": 364649.0256410256, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.0067785798783939425, "epoch": 342, "n_parameters": 303924416} {"train_lr": 0.0026870082640123717, "train_min_lr": 0.0026870082640123717, "train_loss": 0.2791372654804339, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006781630243401593, "epoch": 343, "n_parameters": 303924416} {"train_lr": 0.0026851973524155666, "train_min_lr": 0.0026851973524155666, "train_loss": 0.2790623710413153, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006732210378126742, "epoch": 344, "n_parameters": 303924416} {"train_lr": 0.0026833818333755464, "train_min_lr": 0.0026833818333755464, "train_loss": 0.27905363687433493, "train_loss_scale": 298272.8205128205, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 345, "n_parameters": 303924416} {"train_lr": 0.002681561713980024, "train_min_lr": 0.002681561713980024, "train_loss": 0.2790573897311846, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006746731304491942, "epoch": 346, "n_parameters": 303924416} {"train_lr": 0.002679737001334669, "train_min_lr": 0.002679737001334669, "train_loss": 0.27907794337265956, "train_loss_scale": 238618.2564102564, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 347, "n_parameters": 303924416} {"train_lr": 0.0026779077025630943, "train_min_lr": 0.0026779077025630943, "train_loss": 0.27917468764234143, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006887817758326538, "epoch": 348, "n_parameters": 303924416} {"train_lr": 0.002676073824806804, "train_min_lr": 0.002676073824806804, "train_loss": 0.27904073398214024, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.0067883387897521835, "epoch": 349, "n_parameters": 303924416} {"train_lr": 0.0026742353752251882, "train_min_lr": 0.0026742353752251882, "train_loss": 0.27904549329123723, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006756167591680797, "epoch": 350, "n_parameters": 303924416} {"train_lr": 0.002672392360995473, "train_min_lr": 0.002672392360995473, "train_loss": 0.27900278390659833, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006751192423204581, "epoch": 351, "n_parameters": 303924416} {"train_lr": 0.002670544789312714, "train_min_lr": 0.002670544789312714, "train_loss": 0.27892658718300456, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006740785896396026, "epoch": 352, "n_parameters": 303924416} {"train_lr": 0.0026686926673897596, "train_min_lr": 0.0026686926673897596, "train_loss": 0.2789308470733559, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006735272107060808, "epoch": 353, "n_parameters": 303924416} {"train_lr": 0.0026668360024572186, "train_min_lr": 0.0026668360024572186, "train_loss": 0.2788573157830307, "train_loss_scale": 231896.61538461538, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006696778168985382, "epoch": 354, "n_parameters": 303924416} {"train_lr": 0.0026649748017634396, "train_min_lr": 0.0026649748017634396, "train_loss": 0.2789377939188853, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006809903131141208, "epoch": 355, "n_parameters": 303924416} {"train_lr": 0.002663109072574473, "train_min_lr": 0.002663109072574473, "train_loss": 0.278896252448575, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006769821810452506, "epoch": 356, "n_parameters": 303924416} {"train_lr": 0.00266123882217405, "train_min_lr": 0.00266123882217405, "train_loss": 0.27888611956344295, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006777520424340111, "epoch": 357, "n_parameters": 303924416} {"train_lr": 0.0026593640578635516, "train_min_lr": 0.0026593640578635516, "train_loss": 0.27899748012901116, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.0070311778377240095, "epoch": 358, "n_parameters": 303924416} {"train_lr": 0.002657484786961986, "train_min_lr": 0.002657484786961986, "train_loss": 0.278874055446627, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006738106017180074, "epoch": 359, "n_parameters": 303924416} {"train_lr": 0.0026556010168059534, "train_min_lr": 0.0026556010168059534, "train_loss": 0.2788744827087682, "train_loss_scale": 356246.9743589744, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006775045071919568, "epoch": 360, "n_parameters": 303924416} {"train_lr": 0.0026537127547496104, "train_min_lr": 0.0026537127547496104, "train_loss": 0.2788308225661659, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006764655913381527, "epoch": 361, "n_parameters": 303924416} {"train_lr": 0.0026518200081646626, "train_min_lr": 0.0026518200081646626, "train_loss": 0.27883930556559694, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006776091156121439, "epoch": 362, "n_parameters": 303924416} {"train_lr": 0.0026499227844403155, "train_min_lr": 0.0026499227844403155, "train_loss": 0.27879598066628647, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.0067630826498572836, "epoch": 363, "n_parameters": 303924416} {"train_lr": 0.002648021090983251, "train_min_lr": 0.002648021090983251, "train_loss": 0.2787630823291599, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006722956730160289, "epoch": 364, "n_parameters": 303924416} {"train_lr": 0.0026461149352176097, "train_min_lr": 0.0026461149352176097, "train_loss": 0.27874550962438566, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006752253695128438, "epoch": 365, "n_parameters": 303924416} {"train_lr": 0.00264420432458494, "train_min_lr": 0.00264420432458494, "train_loss": 0.2787875432509165, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006786942765570412, "epoch": 366, "n_parameters": 303924416} {"train_lr": 0.0026422892665441985, "train_min_lr": 0.0026422892665441985, "train_loss": 0.2787529089464209, "train_loss_scale": 1021689.4358974359, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006750963808885083, "epoch": 367, "n_parameters": 303924416} {"train_lr": 0.002640369768571687, "train_min_lr": 0.002640369768571687, "train_loss": 0.2787965264720603, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006811141461814539, "epoch": 368, "n_parameters": 303924416} {"train_lr": 0.0026384458381610567, "train_min_lr": 0.0026384458381610567, "train_loss": 0.27883150168539333, "train_loss_scale": 613349.7435897436, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 369, "n_parameters": 303924416} {"train_lr": 0.002636517482823248, "train_min_lr": 0.002636517482823248, "train_loss": 0.27878117041053396, "train_loss_scale": 475556.10256410256, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 370, "n_parameters": 303924416} {"train_lr": 0.0026345847100864854, "train_min_lr": 0.0026345847100864854, "train_loss": 0.2787338576685542, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.0067629090626724064, "epoch": 371, "n_parameters": 303924416} {"train_lr": 0.002632647527496237, "train_min_lr": 0.002632647527496237, "train_loss": 0.2787267714141844, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006770760238839266, "epoch": 372, "n_parameters": 303924416} {"train_lr": 0.0026307059426151857, "train_min_lr": 0.0026307059426151857, "train_loss": 0.2787692475025184, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006799443789089146, "epoch": 373, "n_parameters": 303924416} {"train_lr": 0.002628759963023199, "train_min_lr": 0.002628759963023199, "train_loss": 0.27886796494325, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007044086264123997, "epoch": 374, "n_parameters": 303924416} {"train_lr": 0.0026268095963173075, "train_min_lr": 0.0026268095963173075, "train_loss": 0.27884356886482775, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006923301040959091, "epoch": 375, "n_parameters": 303924416} {"train_lr": 0.0026248548501116606, "train_min_lr": 0.0026248548501116606, "train_loss": 0.27873545678821987, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006797343416390224, "epoch": 376, "n_parameters": 303924416} {"train_lr": 0.002622895732037515, "train_min_lr": 0.002622895732037515, "train_loss": 0.2786659277611388, "train_loss_scale": 465473.641025641, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006828137658512554, "epoch": 377, "n_parameters": 303924416} {"train_lr": 0.0026209322497431787, "train_min_lr": 0.0026209322497431787, "train_loss": 0.2786712449055929, "train_loss_scale": 265504.8205128205, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 378, "n_parameters": 303924416} {"train_lr": 0.0026189644108940142, "train_min_lr": 0.0026189644108940142, "train_loss": 0.2786626470490144, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006797221341492752, "epoch": 379, "n_parameters": 303924416} {"train_lr": 0.002616992223172395, "train_min_lr": 0.002616992223172395, "train_loss": 0.2786806372310727, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006871328907063565, "epoch": 380, "n_parameters": 303924416} {"train_lr": 0.00261501569427765, "train_min_lr": 0.00261501569427765, "train_loss": 0.27870182283162975, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00686256727651478, "epoch": 381, "n_parameters": 303924416} {"train_lr": 0.002613034831926069, "train_min_lr": 0.002613034831926069, "train_loss": 0.27868677325582564, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006899000727571547, "epoch": 382, "n_parameters": 303924416} {"train_lr": 0.002611049643850867, "train_min_lr": 0.002611049643850867, "train_loss": 0.27860655343960017, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006799028963984874, "epoch": 383, "n_parameters": 303924416} {"train_lr": 0.0026090601378021354, "train_min_lr": 0.0026090601378021354, "train_loss": 0.278605520271529, "train_loss_scale": 413380.92307692306, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.0068300806153088044, "epoch": 384, "n_parameters": 303924416} {"train_lr": 0.0026070663215468225, "train_min_lr": 0.0026070663215468225, "train_loss": 0.27857126807793975, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00681456323647394, "epoch": 385, "n_parameters": 303924416} {"train_lr": 0.002605068202868711, "train_min_lr": 0.002605068202868711, "train_loss": 0.278646065811746, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006864096739520438, "epoch": 386, "n_parameters": 303924416} {"train_lr": 0.0026030657895683754, "train_min_lr": 0.0026030657895683754, "train_loss": 0.27855162736243355, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006816566151638444, "epoch": 387, "n_parameters": 303924416} {"train_lr": 0.002601059089463159, "train_min_lr": 0.002601059089463159, "train_loss": 0.27860343877452975, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006834981474989595, "epoch": 388, "n_parameters": 303924416} {"train_lr": 0.0025990481103871357, "train_min_lr": 0.0025990481103871357, "train_loss": 0.2785049468362465, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006813369180935507, "epoch": 389, "n_parameters": 303924416} {"train_lr": 0.0025970328601910876, "train_min_lr": 0.0025970328601910876, "train_loss": 0.2784780459209847, "train_loss_scale": 313396.5128205128, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 390, "n_parameters": 303924416} {"train_lr": 0.002595013346742474, "train_min_lr": 0.002595013346742474, "train_loss": 0.2784942990103259, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006819224337306924, "epoch": 391, "n_parameters": 303924416} {"train_lr": 0.002592989577925391, "train_min_lr": 0.002592989577925391, "train_loss": 0.2785894958457599, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00691046511891704, "epoch": 392, "n_parameters": 303924416} {"train_lr": 0.0025909615616405536, "train_min_lr": 0.0025909615616405536, "train_loss": 0.2785853365680967, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006898608142271256, "epoch": 393, "n_parameters": 303924416} {"train_lr": 0.0025889293058052524, "train_min_lr": 0.0025889293058052524, "train_loss": 0.2784702139906585, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006837952774591171, "epoch": 394, "n_parameters": 303924416} {"train_lr": 0.002586892818353339, "train_min_lr": 0.002586892818353339, "train_loss": 0.2784423778305212, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006786332905996973, "epoch": 395, "n_parameters": 303924416} {"train_lr": 0.0025848521072351733, "train_min_lr": 0.0025848521072351733, "train_loss": 0.2784211130018752, "train_loss_scale": 365489.23076923075, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006823570210820971, "epoch": 396, "n_parameters": 303924416} {"train_lr": 0.0025828071804176074, "train_min_lr": 0.0025828071804176074, "train_loss": 0.27844394064353156, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006831272115680174, "epoch": 397, "n_parameters": 303924416} {"train_lr": 0.0025807580458839627, "train_min_lr": 0.0025807580458839627, "train_loss": 0.2783816101638457, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006839344683939066, "epoch": 398, "n_parameters": 303924416} {"train_lr": 0.0025787047116339716, "train_min_lr": 0.0025787047116339716, "train_loss": 0.2784353837949964, "train_loss_scale": 363808.8205128205, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 399, "n_parameters": 303924416} {"train_lr": 0.0025766471856837757, "train_min_lr": 0.0025766471856837757, "train_loss": 0.27847270989038336, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.0068554984232506305, "epoch": 400, "n_parameters": 303924416} {"train_lr": 0.0025745854760658637, "train_min_lr": 0.0025745854760658637, "train_loss": 0.27842757019262093, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00688940505288207, "epoch": 401, "n_parameters": 303924416} {"train_lr": 0.002572519590829076, "train_min_lr": 0.002572519590829076, "train_loss": 0.27845268078351343, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.0069432943262374746, "epoch": 402, "n_parameters": 303924416} {"train_lr": 0.002570449538038541, "train_min_lr": 0.002570449538038541, "train_loss": 0.2784199077928534, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006962288888947417, "epoch": 403, "n_parameters": 303924416} {"train_lr": 0.0025683753257756624, "train_min_lr": 0.0025683753257756624, "train_loss": 0.27833030296441835, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006822260480242757, "epoch": 404, "n_parameters": 303924416} {"train_lr": 0.0025662969621380854, "train_min_lr": 0.0025662969621380854, "train_loss": 0.27832133711900753, "train_loss_scale": 315076.92307692306, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.0068335319102073135, "epoch": 405, "n_parameters": 303924416} {"train_lr": 0.0025642144552396537, "train_min_lr": 0.0025642144552396537, "train_loss": 0.278329324350358, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006830119401013526, "epoch": 406, "n_parameters": 303924416} {"train_lr": 0.0025621278132103966, "train_min_lr": 0.0025621278132103966, "train_loss": 0.27830143474066293, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006844380879416489, "epoch": 407, "n_parameters": 303924416} {"train_lr": 0.002560037044196476, "train_min_lr": 0.002560037044196476, "train_loss": 0.27830232919540066, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.0068923970938134845, "epoch": 408, "n_parameters": 303924416} {"train_lr": 0.0025579421563601715, "train_min_lr": 0.0025579421563601715, "train_loss": 0.27830733652883327, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006882251046287517, "epoch": 409, "n_parameters": 303924416} {"train_lr": 0.002555843157879843, "train_min_lr": 0.002555843157879843, "train_loss": 0.27830739683794004, "train_loss_scale": 373891.28205128206, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 410, "n_parameters": 303924416} {"train_lr": 0.0025537400569498967, "train_min_lr": 0.0025537400569498967, "train_loss": 0.27828014969157105, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006866715891728512, "epoch": 411, "n_parameters": 303924416} {"train_lr": 0.002551632861780751, "train_min_lr": 0.002551632861780751, "train_loss": 0.2782981796399093, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006863875159372886, "epoch": 412, "n_parameters": 303924416} {"train_lr": 0.002549521580598816, "train_min_lr": 0.002549521580598816, "train_loss": 0.27825687918812037, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006845012296719524, "epoch": 413, "n_parameters": 303924416} {"train_lr": 0.002547406221646444, "train_min_lr": 0.002547406221646444, "train_loss": 0.27823669905774295, "train_loss_scale": 136533.33333333334, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 414, "n_parameters": 303924416} {"train_lr": 0.002545286793181916, "train_min_lr": 0.002545286793181916, "train_loss": 0.2783207106224906, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007025270161624902, "epoch": 415, "n_parameters": 303924416} {"train_lr": 0.0025431633034793937, "train_min_lr": 0.0025431633034793937, "train_loss": 0.2782585841651337, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006934946565590321, "epoch": 416, "n_parameters": 303924416} {"train_lr": 0.002541035760828894, "train_min_lr": 0.002541035760828894, "train_loss": 0.27826540075385797, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.0069342095909329755, "epoch": 417, "n_parameters": 303924416} {"train_lr": 0.0025389041735362663, "train_min_lr": 0.0025389041735362663, "train_loss": 0.2783433231017672, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006946904165371775, "epoch": 418, "n_parameters": 303924416} {"train_lr": 0.0025367685499231326, "train_min_lr": 0.0025367685499231326, "train_loss": 0.2782877023296001, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006956674867512611, "epoch": 419, "n_parameters": 303924416} {"train_lr": 0.0025346288983268852, "train_min_lr": 0.0025346288983268852, "train_loss": 0.2782383044101059, "train_loss_scale": 202909.53846153847, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006857407219612446, "epoch": 420, "n_parameters": 303924416} {"train_lr": 0.0025324852271006383, "train_min_lr": 0.0025324852271006383, "train_loss": 0.27823383308565003, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006962239240797667, "epoch": 421, "n_parameters": 303924416} {"train_lr": 0.002530337544613201, "train_min_lr": 0.002530337544613201, "train_loss": 0.278161306026726, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006846417190853315, "epoch": 422, "n_parameters": 303924416} {"train_lr": 0.0025281858592490396, "train_min_lr": 0.0025281858592490396, "train_loss": 0.27815444894039476, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006890557794413792, "epoch": 423, "n_parameters": 303924416} {"train_lr": 0.0025260301794082433, "train_min_lr": 0.0025260301794082433, "train_loss": 0.27812226872162843, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006858887861315639, "epoch": 424, "n_parameters": 303924416} {"train_lr": 0.0025238705135065017, "train_min_lr": 0.0025238705135065017, "train_loss": 0.2781983613609694, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006940682703712717, "epoch": 425, "n_parameters": 303924416} {"train_lr": 0.002521706869975065, "train_min_lr": 0.002521706869975065, "train_loss": 0.2781696939536442, "train_loss_scale": 181904.41025641025, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 426, "n_parameters": 303924416} {"train_lr": 0.002519539257260711, "train_min_lr": 0.002519539257260711, "train_loss": 0.27812266909183025, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006902057889442987, "epoch": 427, "n_parameters": 303924416} {"train_lr": 0.0025173676838257156, "train_min_lr": 0.0025173676838257156, "train_loss": 0.27809614708157593, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006911775601717333, "epoch": 428, "n_parameters": 303924416} {"train_lr": 0.0025151921581478085, "train_min_lr": 0.0025151921581478085, "train_loss": 0.2781024097273938, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006924922531769157, "epoch": 429, "n_parameters": 303924416} {"train_lr": 0.0025130126887201593, "train_min_lr": 0.0025130126887201593, "train_loss": 0.27814877985451275, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006950748017212997, "epoch": 430, "n_parameters": 303924416} {"train_lr": 0.002510829284051327, "train_min_lr": 0.002510829284051327, "train_loss": 0.2780945922027175, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006889569446241531, "epoch": 431, "n_parameters": 303924416} {"train_lr": 0.002508641952665238, "train_min_lr": 0.002508641952665238, "train_loss": 0.2780176766622716, "train_loss_scale": 157538.46153846153, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006905243055930791, "epoch": 432, "n_parameters": 303924416} {"train_lr": 0.0025064507031011445, "train_min_lr": 0.0025064507031011445, "train_loss": 0.27815913166015005, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006958633198593862, "epoch": 433, "n_parameters": 303924416} {"train_lr": 0.0025042555439135986, "train_min_lr": 0.0025042555439135986, "train_loss": 0.27804604393000215, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006901485440977968, "epoch": 434, "n_parameters": 303924416} {"train_lr": 0.002502056483672411, "train_min_lr": 0.002502056483672411, "train_loss": 0.2781030018037806, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006953260358363294, "epoch": 435, "n_parameters": 303924416} {"train_lr": 0.0024998535309626348, "train_min_lr": 0.0024998535309626348, "train_loss": 0.27808043541303146, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006940307744886153, "epoch": 436, "n_parameters": 303924416} {"train_lr": 0.0024976466943844944, "train_min_lr": 0.0024976466943844944, "train_loss": 0.2780650560803807, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006942754862113641, "epoch": 437, "n_parameters": 303924416} {"train_lr": 0.0024954359825533996, "train_min_lr": 0.0024954359825533996, "train_loss": 0.27816604720189786, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007075441301239129, "epoch": 438, "n_parameters": 303924416} {"train_lr": 0.0024932214040998806, "train_min_lr": 0.0024932214040998806, "train_loss": 0.27806536618094796, "train_loss_scale": 469674.6666666667, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006912668147542251, "epoch": 439, "n_parameters": 303924416} {"train_lr": 0.0024910029676695655, "train_min_lr": 0.0024910029676695655, "train_loss": 0.2780733123505249, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006945360466861763, "epoch": 440, "n_parameters": 303924416} {"train_lr": 0.002488780681923135, "train_min_lr": 0.002488780681923135, "train_loss": 0.2779939812040912, "train_loss_scale": 280628.5128205128, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 441, "n_parameters": 303924416} {"train_lr": 0.0024865545555363086, "train_min_lr": 0.0024865545555363086, "train_loss": 0.27800397551917017, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006936197124349956, "epoch": 442, "n_parameters": 303924416} {"train_lr": 0.002484324597199788, "train_min_lr": 0.002484324597199788, "train_loss": 0.27795833135799813, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006924860952541423, "epoch": 443, "n_parameters": 303924416} {"train_lr": 0.002482090815619252, "train_min_lr": 0.002482090815619252, "train_loss": 0.2779283973071963, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006920237635942892, "epoch": 444, "n_parameters": 303924416} {"train_lr": 0.002479853219515286, "train_min_lr": 0.002479853219515286, "train_loss": 0.27793956749165094, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006984618917788164, "epoch": 445, "n_parameters": 303924416} {"train_lr": 0.0024776118176233776, "train_min_lr": 0.0024776118176233776, "train_loss": 0.27796280396302253, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006968342599112731, "epoch": 446, "n_parameters": 303924416} {"train_lr": 0.002475366618693875, "train_min_lr": 0.002475366618693875, "train_loss": 0.27793005312410873, "train_loss_scale": 340283.07692307694, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 447, "n_parameters": 303924416} {"train_lr": 0.0024731176314919346, "train_min_lr": 0.0024731176314919346, "train_loss": 0.2779635777816368, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006987505708821118, "epoch": 448, "n_parameters": 303924416} {"train_lr": 0.0024708648647975245, "train_min_lr": 0.0024708648647975245, "train_loss": 0.27791269205343455, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.0069425930000411775, "epoch": 449, "n_parameters": 303924416} {"train_lr": 0.0024686083274053426, "train_min_lr": 0.0024686083274053426, "train_loss": 0.2779801692664384, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007057074600687394, "epoch": 450, "n_parameters": 303924416} {"train_lr": 0.0024663480281248338, "train_min_lr": 0.0024663480281248338, "train_loss": 0.2779168480834088, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.0069061450448293146, "epoch": 451, "n_parameters": 303924416} {"train_lr": 0.002464083975780106, "train_min_lr": 0.002464083975780106, "train_loss": 0.27784676317913604, "train_loss_scale": 148716.3076923077, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 452, "n_parameters": 303924416} {"train_lr": 0.0024618161792099333, "train_min_lr": 0.0024618161792099333, "train_loss": 0.2778747101427796, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006962826748629316, "epoch": 453, "n_parameters": 303924416} {"train_lr": 0.002459544647267703, "train_min_lr": 0.002459544647267703, "train_loss": 0.27782421145074737, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006951222857400679, "epoch": 454, "n_parameters": 303924416} {"train_lr": 0.0024572693888213837, "train_min_lr": 0.0024572693888213837, "train_loss": 0.2777998831558925, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006924458080902696, "epoch": 455, "n_parameters": 303924416} {"train_lr": 0.002454990412753494, "train_min_lr": 0.002454990412753494, "train_loss": 0.2778104829201952, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006983923340717761, "epoch": 456, "n_parameters": 303924416} {"train_lr": 0.002452707727961063, "train_min_lr": 0.002452707727961063, "train_loss": 0.27777493660612845, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006939430639016418, "epoch": 457, "n_parameters": 303924416} {"train_lr": 0.002450421343355605, "train_min_lr": 0.002450421343355605, "train_loss": 0.27780275689497685, "train_loss_scale": 190726.5641025641, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007027060427786544, "epoch": 458, "n_parameters": 303924416} {"train_lr": 0.0024481312678630704, "train_min_lr": 0.0024481312678630704, "train_loss": 0.27784722222266, "train_loss_scale": 131492.10256410256, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 459, "n_parameters": 303924416} {"train_lr": 0.0024458375104238228, "train_min_lr": 0.0024458375104238228, "train_loss": 0.27776961402597433, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006958484727268417, "epoch": 460, "n_parameters": 303924416} {"train_lr": 0.002443540079992599, "train_min_lr": 0.002443540079992599, "train_loss": 0.2777314460090099, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.0069228352692264775, "epoch": 461, "n_parameters": 303924416} {"train_lr": 0.0024412389855384803, "train_min_lr": 0.0024412389855384803, "train_loss": 0.2777422840127913, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006929832621095464, "epoch": 462, "n_parameters": 303924416} {"train_lr": 0.002438934236044838, "train_min_lr": 0.002438934236044838, "train_loss": 0.27769928763453394, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006955537580861113, "epoch": 463, "n_parameters": 303924416} {"train_lr": 0.002436625840509331, "train_min_lr": 0.002436625840509331, "train_loss": 0.27768093802166194, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.0069728782504964145, "epoch": 464, "n_parameters": 303924416} {"train_lr": 0.002434313807943839, "train_min_lr": 0.002434313807943839, "train_loss": 0.27769288707536477, "train_loss_scale": 207950.76923076922, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006953831309846674, "epoch": 465, "n_parameters": 303924416} {"train_lr": 0.002431998147374442, "train_min_lr": 0.002431998147374442, "train_loss": 0.27768266941599834, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006957407767633692, "epoch": 466, "n_parameters": 303924416} {"train_lr": 0.0024296788678413927, "train_min_lr": 0.0024296788678413927, "train_loss": 0.27768643492032796, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007018180870009252, "epoch": 467, "n_parameters": 303924416} {"train_lr": 0.0024273559783990592, "train_min_lr": 0.0024273559783990592, "train_loss": 0.2777059785872459, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.0070236980547913564, "epoch": 468, "n_parameters": 303924416} {"train_lr": 0.0024250294881159144, "train_min_lr": 0.0024250294881159144, "train_loss": 0.27768588949663514, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006999300430134798, "epoch": 469, "n_parameters": 303924416} {"train_lr": 0.0024226994060744782, "train_min_lr": 0.0024226994060744782, "train_loss": 0.2776958274189383, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007016294886573003, "epoch": 470, "n_parameters": 303924416} {"train_lr": 0.0024203657413713023, "train_min_lr": 0.0024203657413713023, "train_loss": 0.277729620739149, "train_loss_scale": 308355.28205128206, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00707059953128919, "epoch": 471, "n_parameters": 303924416} {"train_lr": 0.002418028503116915, "train_min_lr": 0.002418028503116915, "train_loss": 0.277731355981161, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00703099108963775, "epoch": 472, "n_parameters": 303924416} {"train_lr": 0.0024156877004358054, "train_min_lr": 0.0024156877004358054, "train_loss": 0.2777442312829244, "train_loss_scale": 397417.0256410256, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 473, "n_parameters": 303924416} {"train_lr": 0.0024133433424663683, "train_min_lr": 0.0024133433424663683, "train_loss": 0.27763521449210554, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007004384664842525, "epoch": 474, "n_parameters": 303924416} {"train_lr": 0.002410995438360891, "train_min_lr": 0.002410995438360891, "train_loss": 0.2777327028079293, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007141259351136306, "epoch": 475, "n_parameters": 303924416} {"train_lr": 0.0024086439972854892, "train_min_lr": 0.0024086439972854892, "train_loss": 0.2775858156890489, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006982171834888271, "epoch": 476, "n_parameters": 303924416} {"train_lr": 0.002406289028420101, "train_min_lr": 0.002406289028420101, "train_loss": 0.2776187835141825, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007008921783357763, "epoch": 477, "n_parameters": 303924416} {"train_lr": 0.002403930540958429, "train_min_lr": 0.002403930540958429, "train_loss": 0.2776072565675116, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007067419496030571, "epoch": 478, "n_parameters": 303924416} {"train_lr": 0.0024015685441079117, "train_min_lr": 0.0024015685441079117, "train_loss": 0.2776034252049449, "train_loss_scale": 281468.71794871794, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007038961801952563, "epoch": 479, "n_parameters": 303924416} {"train_lr": 0.002399203047089689, "train_min_lr": 0.002399203047089689, "train_loss": 0.27752924864240086, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006977218022653595, "epoch": 480, "n_parameters": 303924416} {"train_lr": 0.0023968340591385717, "train_min_lr": 0.0023968340591385717, "train_loss": 0.27753493732187706, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007012917031832517, "epoch": 481, "n_parameters": 303924416} {"train_lr": 0.002394461589502989, "train_min_lr": 0.002394461589502989, "train_loss": 0.27762989909686625, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007071535762709876, "epoch": 482, "n_parameters": 303924416} {"train_lr": 0.0023920856474449704, "train_min_lr": 0.0023920856474449704, "train_loss": 0.2775810913911137, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007078079951521105, "epoch": 483, "n_parameters": 303924416} {"train_lr": 0.0023897062422400975, "train_min_lr": 0.0023897062422400975, "train_loss": 0.2774908452008206, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.006994958928165336, "epoch": 484, "n_parameters": 303924416} {"train_lr": 0.0023873233831774756, "train_min_lr": 0.0023873233831774756, "train_loss": 0.27749969550850206, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007002327998634428, "epoch": 485, "n_parameters": 303924416} {"train_lr": 0.0023849370795596837, "train_min_lr": 0.0023849370795596837, "train_loss": 0.2774348381715707, "train_loss_scale": 720896.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 486, "n_parameters": 303924416} {"train_lr": 0.0023825473407027647, "train_min_lr": 0.0023825473407027647, "train_loss": 0.27745857890038633, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.0070460343536419365, "epoch": 487, "n_parameters": 303924416} {"train_lr": 0.0023801541759361604, "train_min_lr": 0.0023801541759361604, "train_loss": 0.27749443421952236, "train_loss_scale": 435226.25641025644, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 488, "n_parameters": 303924416} {"train_lr": 0.002377757594602684, "train_min_lr": 0.002377757594602684, "train_loss": 0.27742772981022984, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007085084842028431, "epoch": 489, "n_parameters": 303924416} {"train_lr": 0.0023753576060585027, "train_min_lr": 0.0023753576060585027, "train_loss": 0.2774800274234552, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007076859792085508, "epoch": 490, "n_parameters": 303924416} {"train_lr": 0.0023729542196730697, "train_min_lr": 0.0023729542196730697, "train_loss": 0.27741412875744015, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00704689982544201, "epoch": 491, "n_parameters": 303924416} {"train_lr": 0.0023705474448291095, "train_min_lr": 0.0023705474448291095, "train_loss": 0.2774033464807778, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007041218872659673, "epoch": 492, "n_parameters": 303924416} {"train_lr": 0.002368137290922579, "train_min_lr": 0.002368137290922579, "train_loss": 0.27740950466026193, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007040947675704956, "epoch": 493, "n_parameters": 303924416} {"train_lr": 0.002365723767362617, "train_min_lr": 0.002365723767362617, "train_loss": 0.2774104702238662, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007037724832442995, "epoch": 494, "n_parameters": 303924416} {"train_lr": 0.0023633068835715253, "train_min_lr": 0.0023633068835715253, "train_loss": 0.2774263331009887, "train_loss_scale": 505803.4871794872, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007113145946417577, "epoch": 495, "n_parameters": 303924416} {"train_lr": 0.0023608866489847244, "train_min_lr": 0.0023608866489847244, "train_loss": 0.27739622647491974, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007074452144129632, "epoch": 496, "n_parameters": 303924416} {"train_lr": 0.0023584630730507047, "train_min_lr": 0.0023584630730507047, "train_loss": 0.277428990853831, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007090134226764815, "epoch": 497, "n_parameters": 303924416} {"train_lr": 0.00235603616523102, "train_min_lr": 0.00235603616523102, "train_loss": 0.27737312734377784, "train_loss_scale": 262984.2051282051, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 498, "n_parameters": 303924416} {"train_lr": 0.002353605935000214, "train_min_lr": 0.002353605935000214, "train_loss": 0.27736721000968456, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.0070643113815607745, "epoch": 499, "n_parameters": 303924416} {"train_lr": 0.0023511723918458135, "train_min_lr": 0.0023511723918458135, "train_loss": 0.2773260087586748, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007034528236358593, "epoch": 500, "n_parameters": 303924416} {"train_lr": 0.0023487355452682713, "train_min_lr": 0.0023487355452682713, "train_loss": 0.277351524012211, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007084863293223465, "epoch": 501, "n_parameters": 303924416} {"train_lr": 0.002346295404780935, "train_min_lr": 0.002346295404780935, "train_loss": 0.2773594883664583, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007080237233402351, "epoch": 502, "n_parameters": 303924416} {"train_lr": 0.002343851979910019, "train_min_lr": 0.002343851979910019, "train_loss": 0.2773947828323938, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007135116272808936, "epoch": 503, "n_parameters": 303924416} {"train_lr": 0.002341405280194559, "train_min_lr": 0.002341405280194559, "train_loss": 0.27731530094602835, "train_loss_scale": 314236.71794871794, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 504, "n_parameters": 303924416} {"train_lr": 0.0023389553151863694, "train_min_lr": 0.0023389553151863694, "train_loss": 0.2772837113857699, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007021652864447485, "epoch": 505, "n_parameters": 303924416} {"train_lr": 0.0023365020944500123, "train_min_lr": 0.0023365020944500123, "train_loss": 0.27736378316051113, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007216586911967263, "epoch": 506, "n_parameters": 303924416} {"train_lr": 0.0023340456275627703, "train_min_lr": 0.0023340456275627703, "train_loss": 0.2772590012008993, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007100416090673743, "epoch": 507, "n_parameters": 303924416} {"train_lr": 0.0023315859241145867, "train_min_lr": 0.0023315859241145867, "train_loss": 0.27732804339701456, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007138584843442704, "epoch": 508, "n_parameters": 303924416} {"train_lr": 0.002329122993708048, "train_min_lr": 0.002329122993708048, "train_loss": 0.2772839594638357, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007113816440463639, "epoch": 509, "n_parameters": 303924416} {"train_lr": 0.0023266568459583315, "train_min_lr": 0.0023266568459583315, "train_loss": 0.2772933120528857, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007146216763290935, "epoch": 510, "n_parameters": 303924416} {"train_lr": 0.002324187490493184, "train_min_lr": 0.002324187490493184, "train_loss": 0.2772482192018427, "train_loss_scale": 518406.5641025641, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007101553340012638, "epoch": 511, "n_parameters": 303924416} {"train_lr": 0.0023217149369528702, "train_min_lr": 0.0023217149369528702, "train_loss": 0.27724470919929445, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007175833106231995, "epoch": 512, "n_parameters": 303924416} {"train_lr": 0.0023192391949901393, "train_min_lr": 0.0023192391949901393, "train_loss": 0.27731250283212805, "train_loss_scale": 376411.89743589744, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 513, "n_parameters": 303924416} {"train_lr": 0.002316760274270188, "train_min_lr": 0.002316760274270188, "train_loss": 0.27731449709823114, "train_loss_scale": 183584.8205128205, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 514, "n_parameters": 303924416} {"train_lr": 0.002314278184470623, "train_min_lr": 0.002314278184470623, "train_loss": 0.2771757293958217, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00710401243607824, "epoch": 515, "n_parameters": 303924416} {"train_lr": 0.0023117929352814244, "train_min_lr": 0.0023117929352814244, "train_loss": 0.27734488658965206, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007292196923234046, "epoch": 516, "n_parameters": 303924416} {"train_lr": 0.0023093045364049047, "train_min_lr": 0.0023093045364049047, "train_loss": 0.2772485053954789, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007076983995293864, "epoch": 517, "n_parameters": 303924416} {"train_lr": 0.0023068129975556753, "train_min_lr": 0.0023068129975556753, "train_loss": 0.2771834844621854, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.0071188672997941, "epoch": 518, "n_parameters": 303924416} {"train_lr": 0.002304318328460604, "train_min_lr": 0.002304318328460604, "train_loss": 0.27710472755969906, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007060960473982283, "epoch": 519, "n_parameters": 303924416} {"train_lr": 0.002301820538858778, "train_min_lr": 0.002301820538858778, "train_loss": 0.27713523806335455, "train_loss_scale": 155858.05128205128, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.0071026572930769855, "epoch": 520, "n_parameters": 303924416} {"train_lr": 0.002299319638501468, "train_min_lr": 0.002299319638501468, "train_loss": 0.2771298959134863, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007104750124152559, "epoch": 521, "n_parameters": 303924416} {"train_lr": 0.0022968156371520896, "train_min_lr": 0.0022968156371520896, "train_loss": 0.27712967943173283, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007121565500990703, "epoch": 522, "n_parameters": 303924416} {"train_lr": 0.0022943085445861614, "train_min_lr": 0.0022943085445861614, "train_loss": 0.27707310567777127, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007079453694108778, "epoch": 523, "n_parameters": 303924416} {"train_lr": 0.0022917983705912763, "train_min_lr": 0.0022917983705912763, "train_loss": 0.2770657054273985, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.0071479205149584096, "epoch": 524, "n_parameters": 303924416} {"train_lr": 0.0022892851249670496, "train_min_lr": 0.0022892851249670496, "train_loss": 0.2770045479693904, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.0070756341447122395, "epoch": 525, "n_parameters": 303924416} {"train_lr": 0.0022867688175250956, "train_min_lr": 0.0022867688175250956, "train_loss": 0.27705911460977334, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007188384354007072, "epoch": 526, "n_parameters": 303924416} {"train_lr": 0.0022842494580889716, "train_min_lr": 0.0022842494580889716, "train_loss": 0.2770619211025918, "train_loss_scale": 466313.8461538461, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00711146656734248, "epoch": 527, "n_parameters": 303924416} {"train_lr": 0.0022817270564941617, "train_min_lr": 0.0022817270564941617, "train_loss": 0.27698375028259575, "train_loss_scale": 345324.3076923077, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 528, "n_parameters": 303924416} {"train_lr": 0.002279201622588018, "train_min_lr": 0.002279201622588018, "train_loss": 0.2770357350699412, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007131994310885859, "epoch": 529, "n_parameters": 303924416} {"train_lr": 0.0022766731662297347, "train_min_lr": 0.0022766731662297347, "train_loss": 0.2769604182849901, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.0071020629585314635, "epoch": 530, "n_parameters": 303924416} {"train_lr": 0.002274141697290305, "train_min_lr": 0.002274141697290305, "train_loss": 0.2770919900638266, "train_loss_scale": 234417.23076923078, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 531, "n_parameters": 303924416} {"train_lr": 0.0022716072256524756, "train_min_lr": 0.0022716072256524756, "train_loss": 0.2770584477260948, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.0071857600929573755, "epoch": 532, "n_parameters": 303924416} {"train_lr": 0.002269069761210729, "train_min_lr": 0.002269069761210729, "train_loss": 0.27705375554087835, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007201057887122704, "epoch": 533, "n_parameters": 303924416} {"train_lr": 0.0022665293138712287, "train_min_lr": 0.0022665293138712287, "train_loss": 0.27708586162397975, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.0072096671135976715, "epoch": 534, "n_parameters": 303924416} {"train_lr": 0.002263985893551773, "train_min_lr": 0.002263985893551773, "train_loss": 0.2770363359132973, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007288365371119327, "epoch": 535, "n_parameters": 303924416} {"train_lr": 0.002261439510181771, "train_min_lr": 0.002261439510181771, "train_loss": 0.27704103267453134, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007188507993048869, "epoch": 536, "n_parameters": 303924416} {"train_lr": 0.002258890173702205, "train_min_lr": 0.002258890173702205, "train_loss": 0.27695584004350865, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007117551170850698, "epoch": 537, "n_parameters": 303924416} {"train_lr": 0.002256337894065582, "train_min_lr": 0.002256337894065582, "train_loss": 0.27696036628316134, "train_loss_scale": 236097.64102564103, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007156690084136641, "epoch": 538, "n_parameters": 303924416} {"train_lr": 0.0022537826812358995, "train_min_lr": 0.0022537826812358995, "train_loss": 0.2769349463439236, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007176259216159964, "epoch": 539, "n_parameters": 303924416} {"train_lr": 0.002251224545188606, "train_min_lr": 0.002251224545188606, "train_loss": 0.27689373802441436, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007126413914076506, "epoch": 540, "n_parameters": 303924416} {"train_lr": 0.0022486634959105628, "train_min_lr": 0.0022486634959105628, "train_loss": 0.2769183007707724, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007175477978307754, "epoch": 541, "n_parameters": 303924416} {"train_lr": 0.002246099543400005, "train_min_lr": 0.002246099543400005, "train_loss": 0.2768773574453707, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.0071404313665623656, "epoch": 542, "n_parameters": 303924416} {"train_lr": 0.0022435326976664965, "train_min_lr": 0.0022435326976664965, "train_loss": 0.2767805052586855, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.0071041557000567895, "epoch": 543, "n_parameters": 303924416} {"train_lr": 0.0022409629687309036, "train_min_lr": 0.0022409629687309036, "train_loss": 0.2767676587532967, "train_loss_scale": 364649.0256410256, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007128461416309269, "epoch": 544, "n_parameters": 303924416} {"train_lr": 0.0022383903666253436, "train_min_lr": 0.0022383903666253436, "train_loss": 0.2767485371217705, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.0070801564635565644, "epoch": 545, "n_parameters": 303924416} {"train_lr": 0.002235814901393154, "train_min_lr": 0.002235814901393154, "train_loss": 0.27683508368985105, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007181331254123973, "epoch": 546, "n_parameters": 303924416} {"train_lr": 0.0022332365830888467, "train_min_lr": 0.0022332365830888467, "train_loss": 0.27687975706066936, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007263556806346736, "epoch": 547, "n_parameters": 303924416} {"train_lr": 0.0022306554217780723, "train_min_lr": 0.0022306554217780723, "train_loss": 0.2768355523659967, "train_loss_scale": 312556.3076923077, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 548, "n_parameters": 303924416} {"train_lr": 0.002228071427537582, "train_min_lr": 0.002228071427537582, "train_loss": 0.2768564131379557, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007301591393442299, "epoch": 549, "n_parameters": 303924416} {"train_lr": 0.0022254846104551853, "train_min_lr": 0.0022254846104551853, "train_loss": 0.2769114480992684, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007332015701999458, "epoch": 550, "n_parameters": 303924416} {"train_lr": 0.002222894980629715, "train_min_lr": 0.002222894980629715, "train_loss": 0.27688485918602407, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007212213089820953, "epoch": 551, "n_parameters": 303924416} {"train_lr": 0.0022203025481709825, "train_min_lr": 0.0022203025481709825, "train_loss": 0.2768182574520604, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007191895371458183, "epoch": 552, "n_parameters": 303924416} {"train_lr": 0.002217707323199736, "train_min_lr": 0.002217707323199736, "train_loss": 0.276817001369543, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007212071381031702, "epoch": 553, "n_parameters": 303924416} {"train_lr": 0.0022151093158476296, "train_min_lr": 0.0022151093158476296, "train_loss": 0.2767383301964937, "train_loss_scale": 366329.4358974359, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007149804047702883, "epoch": 554, "n_parameters": 303924416} {"train_lr": 0.0022125085362571847, "train_min_lr": 0.0022125085362571847, "train_loss": 0.276716861826105, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00721080440771169, "epoch": 555, "n_parameters": 303924416} {"train_lr": 0.0022099049945817385, "train_min_lr": 0.0022099049945817385, "train_loss": 0.2767395442262149, "train_loss_scale": 354566.5641025641, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 556, "n_parameters": 303924416} {"train_lr": 0.0022072987009854137, "train_min_lr": 0.0022072987009854137, "train_loss": 0.2767576451073043, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007241090534266849, "epoch": 557, "n_parameters": 303924416} {"train_lr": 0.0022046896656430785, "train_min_lr": 0.0022046896656430785, "train_loss": 0.2767830525483124, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007474724840647422, "epoch": 558, "n_parameters": 303924416} {"train_lr": 0.0022020778987402964, "train_min_lr": 0.0022020778987402964, "train_loss": 0.27691469161222, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007372604529313647, "epoch": 559, "n_parameters": 303924416} {"train_lr": 0.0021994634104733073, "train_min_lr": 0.0021994634104733073, "train_loss": 0.27677041167118704, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007223807763344107, "epoch": 560, "n_parameters": 303924416} {"train_lr": 0.0021968462110489636, "train_min_lr": 0.0021968462110489636, "train_loss": 0.2766980767214241, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00720308093019785, "epoch": 561, "n_parameters": 303924416} {"train_lr": 0.0021942263106847085, "train_min_lr": 0.0021942263106847085, "train_loss": 0.27690151081575703, "train_loss_scale": 324319.1794871795, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.0074403303845499, "epoch": 562, "n_parameters": 303924416} {"train_lr": 0.00219160371960853, "train_min_lr": 0.00219160371960853, "train_loss": 0.276729937021931, "train_loss_scale": 270546.0512820513, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 563, "n_parameters": 303924416} {"train_lr": 0.0021889784480589184, "train_min_lr": 0.0021889784480589184, "train_loss": 0.2766654053344749, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.0071991374283336485, "epoch": 564, "n_parameters": 303924416} {"train_lr": 0.002186350506284827, "train_min_lr": 0.002186350506284827, "train_loss": 0.27667226126262295, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007208786624221084, "epoch": 565, "n_parameters": 303924416} {"train_lr": 0.0021837199045456382, "train_min_lr": 0.0021837199045456382, "train_loss": 0.2766713918282244, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007287343456529272, "epoch": 566, "n_parameters": 303924416} {"train_lr": 0.002181086653111113, "train_min_lr": 0.002181086653111113, "train_loss": 0.2766215143761096, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007212042915993012, "epoch": 567, "n_parameters": 303924416} {"train_lr": 0.0021784507622613566, "train_min_lr": 0.0021784507622613566, "train_loss": 0.2766545675575542, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007221414762823723, "epoch": 568, "n_parameters": 303924416} {"train_lr": 0.0021758122422867888, "train_min_lr": 0.0021758122422867888, "train_loss": 0.2765952594703637, "train_loss_scale": 408339.6923076923, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007236578703249017, "epoch": 569, "n_parameters": 303924416} {"train_lr": 0.0021731711034880846, "train_min_lr": 0.0021731711034880846, "train_loss": 0.2766690291374779, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007276363326057505, "epoch": 570, "n_parameters": 303924416} {"train_lr": 0.002170527356176138, "train_min_lr": 0.002170527356176138, "train_loss": 0.27656254432916355, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007237369596110418, "epoch": 571, "n_parameters": 303924416} {"train_lr": 0.0021678810106720405, "train_min_lr": 0.0021678810106720405, "train_loss": 0.2765751142944329, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007237876874382774, "epoch": 572, "n_parameters": 303924416} {"train_lr": 0.0021652320773070076, "train_min_lr": 0.0021652320773070076, "train_loss": 0.27651527805779225, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.0071993424207903445, "epoch": 573, "n_parameters": 303924416} {"train_lr": 0.0021625805664223837, "train_min_lr": 0.0021625805664223837, "train_loss": 0.27655438005398864, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007262863538180215, "epoch": 574, "n_parameters": 303924416} {"train_lr": 0.0021599264883695505, "train_min_lr": 0.0021599264883695505, "train_loss": 0.27658791435010827, "train_loss_scale": 598226.0512820513, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 575, "n_parameters": 303924416} {"train_lr": 0.002157269853509928, "train_min_lr": 0.002157269853509928, "train_loss": 0.2764733367545817, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.0072063288012614996, "epoch": 576, "n_parameters": 303924416} {"train_lr": 0.0021546106722149095, "train_min_lr": 0.0021546106722149095, "train_loss": 0.27653518322711956, "train_loss_scale": 386494.358974359, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 577, "n_parameters": 303924416} {"train_lr": 0.002151948954865835, "train_min_lr": 0.002151948954865835, "train_loss": 0.27649225830697477, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007271907450511861, "epoch": 578, "n_parameters": 303924416} {"train_lr": 0.0021492847118539373, "train_min_lr": 0.0021492847118539373, "train_loss": 0.2764590303980721, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007283929507069003, "epoch": 579, "n_parameters": 303924416} {"train_lr": 0.002146617953580322, "train_min_lr": 0.002146617953580322, "train_loss": 0.2764647039763916, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00725011305602936, "epoch": 580, "n_parameters": 303924416} {"train_lr": 0.0021439486904558996, "train_min_lr": 0.0021439486904558996, "train_loss": 0.2764588196773846, "train_loss_scale": 216772.92307692306, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 581, "n_parameters": 303924416} {"train_lr": 0.0021412769329013656, "train_min_lr": 0.0021412769329013656, "train_loss": 0.27646867358066046, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00728822308473098, "epoch": 582, "n_parameters": 303924416} {"train_lr": 0.0021386026913471664, "train_min_lr": 0.0021386026913471664, "train_loss": 0.2765175942450953, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00738846974220509, "epoch": 583, "n_parameters": 303924416} {"train_lr": 0.0021359259762334173, "train_min_lr": 0.0021359259762334173, "train_loss": 0.2765435954549899, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007435661245089693, "epoch": 584, "n_parameters": 303924416} {"train_lr": 0.0021332467980099226, "train_min_lr": 0.0021332467980099226, "train_loss": 0.2764169804017561, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007258924748151539, "epoch": 585, "n_parameters": 303924416} {"train_lr": 0.0021305651671360733, "train_min_lr": 0.0021305651671360733, "train_loss": 0.2763829672506127, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007261956592675489, "epoch": 586, "n_parameters": 303924416} {"train_lr": 0.0021278810940808597, "train_min_lr": 0.0021278810940808597, "train_loss": 0.27637446430834156, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007249284561019009, "epoch": 587, "n_parameters": 303924416} {"train_lr": 0.0021251945893227893, "train_min_lr": 0.0021251945893227893, "train_loss": 0.27637888193249893, "train_loss_scale": 253741.94871794872, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.0072937611048706835, "epoch": 588, "n_parameters": 303924416} {"train_lr": 0.0021225056633498727, "train_min_lr": 0.0021225056633498727, "train_loss": 0.2763790916830588, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007272579237066496, "epoch": 589, "n_parameters": 303924416} {"train_lr": 0.002119814326659565, "train_min_lr": 0.002119814326659565, "train_loss": 0.2763720379533389, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007344750369576594, "epoch": 590, "n_parameters": 303924416} {"train_lr": 0.002117120589758743, "train_min_lr": 0.002117120589758743, "train_loss": 0.2764583387239, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007442098852688781, "epoch": 591, "n_parameters": 303924416} {"train_lr": 0.002114424463163643, "train_min_lr": 0.002114424463163643, "train_loss": 0.2764861652961908, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007450393698094651, "epoch": 592, "n_parameters": 303924416} {"train_lr": 0.0021117259573998353, "train_min_lr": 0.0021117259573998353, "train_loss": 0.27637926592396045, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007311948706419804, "epoch": 593, "n_parameters": 303924416} {"train_lr": 0.002109025083002179, "train_min_lr": 0.002109025083002179, "train_loss": 0.27632100710299057, "train_loss_scale": 399937.641025641, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00732336603015518, "epoch": 594, "n_parameters": 303924416} {"train_lr": 0.0021063218505147814, "train_min_lr": 0.0021063218505147814, "train_loss": 0.2762841378959517, "train_loss_scale": 346164.5128205128, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 595, "n_parameters": 303924416} {"train_lr": 0.0021036162704909512, "train_min_lr": 0.0021036162704909512, "train_loss": 0.27632287023171115, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007345598820859614, "epoch": 596, "n_parameters": 303924416} {"train_lr": 0.0021009083534931686, "train_min_lr": 0.0021009083534931686, "train_loss": 0.2762852497667504, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.0073076442036276255, "epoch": 597, "n_parameters": 303924416} {"train_lr": 0.0020981981100930327, "train_min_lr": 0.0020981981100930327, "train_loss": 0.27629971535852516, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007308231372660838, "epoch": 598, "n_parameters": 303924416} {"train_lr": 0.0020954855508712156, "train_min_lr": 0.0020954855508712156, "train_loss": 0.2762987959759835, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007307559851771937, "epoch": 599, "n_parameters": 303924416} {"train_lr": 0.002092770686417453, "train_min_lr": 0.002092770686417453, "train_loss": 0.27629267387032413, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.0073973586758933, "epoch": 600, "n_parameters": 303924416} {"train_lr": 0.0020900535273304594, "train_min_lr": 0.0020900535273304594, "train_loss": 0.27634263013859683, "train_loss_scale": 278948.10256410256, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 601, "n_parameters": 303924416} {"train_lr": 0.0020873340842179158, "train_min_lr": 0.0020873340842179158, "train_loss": 0.27631456828497064, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00745980065011491, "epoch": 602, "n_parameters": 303924416} {"train_lr": 0.0020846123676964215, "train_min_lr": 0.0020846123676964215, "train_loss": 0.2762403334222304, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007323058511620053, "epoch": 603, "n_parameters": 303924416} {"train_lr": 0.002081888388391451, "train_min_lr": 0.002081888388391451, "train_loss": 0.2763598360306321, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007484259700271277, "epoch": 604, "n_parameters": 303924416} {"train_lr": 0.0020791621569373037, "train_min_lr": 0.0020791621569373037, "train_loss": 0.27622813506959343, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007254524338536729, "epoch": 605, "n_parameters": 303924416} {"train_lr": 0.0020764336839770837, "train_min_lr": 0.0020764336839770837, "train_loss": 0.276222557375709, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007311566711928791, "epoch": 606, "n_parameters": 303924416} {"train_lr": 0.0020737029801626366, "train_min_lr": 0.0020737029801626366, "train_loss": 0.2762081867131667, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00738603609184233, "epoch": 607, "n_parameters": 303924416} {"train_lr": 0.0020709700561545212, "train_min_lr": 0.0020709700561545212, "train_loss": 0.2762273810982991, "train_loss_scale": 470514.8717948718, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007397478446364403, "epoch": 608, "n_parameters": 303924416} {"train_lr": 0.0020682349226219697, "train_min_lr": 0.0020682349226219697, "train_loss": 0.27621545609779274, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00739657528585014, "epoch": 609, "n_parameters": 303924416} {"train_lr": 0.0020654975902428246, "train_min_lr": 0.0020654975902428246, "train_loss": 0.2761600248437996, "train_loss_scale": 476396.3076923077, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 610, "n_parameters": 303924416} {"train_lr": 0.0020627580697035255, "train_min_lr": 0.0020627580697035255, "train_loss": 0.27615018884460324, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007361210954303925, "epoch": 611, "n_parameters": 303924416} {"train_lr": 0.0020600163716990544, "train_min_lr": 0.0020600163716990544, "train_loss": 0.27613572175435436, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007389111793600023, "epoch": 612, "n_parameters": 303924416} {"train_lr": 0.002057272506932887, "train_min_lr": 0.002057272506932887, "train_loss": 0.2760947795304207, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007323383186490108, "epoch": 613, "n_parameters": 303924416} {"train_lr": 0.00205452648611696, "train_min_lr": 0.00205452648611696, "train_loss": 0.276095431838304, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007429174425152059, "epoch": 614, "n_parameters": 303924416} {"train_lr": 0.002051778319971633, "train_min_lr": 0.002051778319971633, "train_loss": 0.27610701203752214, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007431940503943807, "epoch": 615, "n_parameters": 303924416} {"train_lr": 0.0020490280192256333, "train_min_lr": 0.0020490280192256333, "train_loss": 0.27610244566741854, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007395117970493932, "epoch": 616, "n_parameters": 303924416} {"train_lr": 0.002046275594616027, "train_min_lr": 0.002046275594616027, "train_loss": 0.2760486242373307, "train_loss_scale": 464633.4358974359, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007347420846016552, "epoch": 617, "n_parameters": 303924416} {"train_lr": 0.002043521056888168, "train_min_lr": 0.002043521056888168, "train_loss": 0.27605628261927706, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007426646294800612, "epoch": 618, "n_parameters": 303924416} {"train_lr": 0.002040764416795663, "train_min_lr": 0.002040764416795663, "train_loss": 0.27608956689409053, "train_loss_scale": 404978.8717948718, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 619, "n_parameters": 303924416} {"train_lr": 0.002038005685100323, "train_min_lr": 0.002038005685100323, "train_loss": 0.2760592621589939, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007419135026000918, "epoch": 620, "n_parameters": 303924416} {"train_lr": 0.0020352448725721264, "train_min_lr": 0.0020352448725721264, "train_loss": 0.27604405531803, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007464409155102494, "epoch": 621, "n_parameters": 303924416} {"train_lr": 0.0020324819899891758, "train_min_lr": 0.0020324819899891758, "train_loss": 0.2761117094882931, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007502029633495766, "epoch": 622, "n_parameters": 303924416} {"train_lr": 0.0020297170481376535, "train_min_lr": 0.0020297170481376535, "train_loss": 0.276086164957796, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007464833462085479, "epoch": 623, "n_parameters": 303924416} {"train_lr": 0.0020269500578117827, "train_min_lr": 0.0020269500578117827, "train_loss": 0.27615754559743577, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.0075776709261565255, "epoch": 624, "n_parameters": 303924416} {"train_lr": 0.00202418102981378, "train_min_lr": 0.00202418102981378, "train_loss": 0.27605725074998844, "train_loss_scale": 273906.8717948718, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007377858406517846, "epoch": 625, "n_parameters": 303924416} {"train_lr": 0.002021409974953821, "train_min_lr": 0.002021409974953821, "train_loss": 0.2760450654119874, "train_loss_scale": 355406.76923076925, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 626, "n_parameters": 303924416} {"train_lr": 0.0020186369040499945, "train_min_lr": 0.0020186369040499945, "train_loss": 0.276051138047105, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007500062790663483, "epoch": 627, "n_parameters": 303924416} {"train_lr": 0.0020158618279282566, "train_min_lr": 0.0020158618279282566, "train_loss": 0.27599443282442504, "train_loss_scale": 186105.4358974359, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 628, "n_parameters": 303924416} {"train_lr": 0.0020130847574223974, "train_min_lr": 0.0020130847574223974, "train_loss": 0.2759427225921685, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007376123397336461, "epoch": 629, "n_parameters": 303924416} {"train_lr": 0.0020103057033739845, "train_min_lr": 0.0020103057033739845, "train_loss": 0.2758605339570353, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00741799875574473, "epoch": 630, "n_parameters": 303924416} {"train_lr": 0.0020075246766323336, "train_min_lr": 0.0020075246766323336, "train_loss": 0.27594784585735166, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.0074148279889367326, "epoch": 631, "n_parameters": 303924416} {"train_lr": 0.0020047416880544654, "train_min_lr": 0.0020047416880544654, "train_loss": 0.27593077571155167, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007445375482814434, "epoch": 632, "n_parameters": 303924416} {"train_lr": 0.0020019567485050536, "train_min_lr": 0.0020019567485050536, "train_loss": 0.27595904447997993, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007494786840815766, "epoch": 633, "n_parameters": 303924416} {"train_lr": 0.0019991698688563877, "train_min_lr": 0.0019991698688563877, "train_loss": 0.2759081479251528, "train_loss_scale": 153337.4358974359, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007436227944428817, "epoch": 634, "n_parameters": 303924416} {"train_lr": 0.0019963810599883383, "train_min_lr": 0.0019963810599883383, "train_loss": 0.2758937225294992, "train_loss_scale": 177913.4358974359, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 635, "n_parameters": 303924416} {"train_lr": 0.001993590332788305, "train_min_lr": 0.001993590332788305, "train_loss": 0.2759909318395866, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007574618137322175, "epoch": 636, "n_parameters": 303924416} {"train_lr": 0.001990797698151171, "train_min_lr": 0.001990797698151171, "train_loss": 0.2758222682885109, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007462203807125871, "epoch": 637, "n_parameters": 303924416} {"train_lr": 0.0019880031669792728, "train_min_lr": 0.0019880031669792728, "train_loss": 0.2759389639294778, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007557777058070478, "epoch": 638, "n_parameters": 303924416} {"train_lr": 0.0019852067501823466, "train_min_lr": 0.0019852067501823466, "train_loss": 0.27587412377126896, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007498351651100585, "epoch": 639, "n_parameters": 303924416} {"train_lr": 0.001982408458677493, "train_min_lr": 0.001982408458677493, "train_loss": 0.2757573332279347, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.0073558890786117465, "epoch": 640, "n_parameters": 303924416} {"train_lr": 0.001979608303389129, "train_min_lr": 0.001979608303389129, "train_loss": 0.2757559221160288, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007447346355407857, "epoch": 641, "n_parameters": 303924416} {"train_lr": 0.001976806295248947, "train_min_lr": 0.001976806295248947, "train_loss": 0.27578215687893903, "train_loss_scale": 116998.56410256411, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.0075287966711375005, "epoch": 642, "n_parameters": 303924416} {"train_lr": 0.001974002445195878, "train_min_lr": 0.001974002445195878, "train_loss": 0.27577406313652414, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007454888659338348, "epoch": 643, "n_parameters": 303924416} {"train_lr": 0.0019711967641760394, "train_min_lr": 0.0019711967641760394, "train_loss": 0.27581577820894426, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007534255057227057, "epoch": 644, "n_parameters": 303924416} {"train_lr": 0.001968389263142698, "train_min_lr": 0.001968389263142698, "train_loss": 0.27586380359179413, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007615071306458841, "epoch": 645, "n_parameters": 303924416} {"train_lr": 0.0019655799530562206, "train_min_lr": 0.0019655799530562206, "train_loss": 0.27578275312836736, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007525069813709706, "epoch": 646, "n_parameters": 303924416} {"train_lr": 0.0019627688448840505, "train_min_lr": 0.0019627688448840505, "train_loss": 0.2756830193460561, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007390490465928825, "epoch": 647, "n_parameters": 303924416} {"train_lr": 0.001959955949600632, "train_min_lr": 0.001959955949600632, "train_loss": 0.2759211812681781, "train_loss_scale": 180224.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007791875491444117, "epoch": 648, "n_parameters": 303924416} {"train_lr": 0.0019571412781874023, "train_min_lr": 0.0019571412781874023, "train_loss": 0.2759003708771884, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007644461400699443, "epoch": 649, "n_parameters": 303924416} {"train_lr": 0.001954324841632723, "train_min_lr": 0.001954324841632723, "train_loss": 0.2757310271472073, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007492340311634904, "epoch": 650, "n_parameters": 303924416} {"train_lr": 0.0019515066509318512, "train_min_lr": 0.0019515066509318512, "train_loss": 0.27586089040881073, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007754729508470075, "epoch": 651, "n_parameters": 303924416} {"train_lr": 0.0019486867170868882, "train_min_lr": 0.0019486867170868882, "train_loss": 0.27581866630591834, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007623233021989178, "epoch": 652, "n_parameters": 303924416} {"train_lr": 0.0019458650511067438, "train_min_lr": 0.0019458650511067438, "train_loss": 0.27572341619704205, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007528828865944002, "epoch": 653, "n_parameters": 303924416} {"train_lr": 0.0019430416640070901, "train_min_lr": 0.0019430416640070901, "train_loss": 0.2756762912240214, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007495547358233195, "epoch": 654, "n_parameters": 303924416} {"train_lr": 0.001940216566810318, "train_min_lr": 0.001940216566810318, "train_loss": 0.2756707564097805, "train_loss_scale": 289030.5641025641, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 655, "n_parameters": 303924416} {"train_lr": 0.0019373897705454927, "train_min_lr": 0.0019373897705454927, "train_loss": 0.2756563044744186, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007563464127134723, "epoch": 656, "n_parameters": 303924416} {"train_lr": 0.0019345612862483096, "train_min_lr": 0.0019345612862483096, "train_loss": 0.27570014137129945, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007627538563564229, "epoch": 657, "n_parameters": 303924416} {"train_lr": 0.0019317311249610642, "train_min_lr": 0.0019317311249610642, "train_loss": 0.2757108925710408, "train_loss_scale": 149556.5128205128, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 658, "n_parameters": 303924416} {"train_lr": 0.0019288992977325938, "train_min_lr": 0.0019288992977325938, "train_loss": 0.27572759902642036, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007626407137976434, "epoch": 659, "n_parameters": 303924416} {"train_lr": 0.0019260658156182299, "train_min_lr": 0.0019260658156182299, "train_loss": 0.275622862803105, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007543374799215832, "epoch": 660, "n_parameters": 303924416} {"train_lr": 0.0019232306896797824, "train_min_lr": 0.0019232306896797824, "train_loss": 0.27562262382334435, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.0076281295534080034, "epoch": 661, "n_parameters": 303924416} {"train_lr": 0.0019203939309854635, "train_min_lr": 0.0019203939309854635, "train_loss": 0.27555953161134267, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007524502036782603, "epoch": 662, "n_parameters": 303924416} {"train_lr": 0.0019175555506098707, "train_min_lr": 0.0019175555506098707, "train_loss": 0.2755393215234224, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007523116640722713, "epoch": 663, "n_parameters": 303924416} {"train_lr": 0.0019147155596339223, "train_min_lr": 0.0019147155596339223, "train_loss": 0.27550286984441275, "train_loss_scale": 131492.10256410256, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 664, "n_parameters": 303924416} {"train_lr": 0.0019118739691448333, "train_min_lr": 0.0019118739691448333, "train_loss": 0.27558225323147595, "train_loss_scale": 119729.23076923077, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 665, "n_parameters": 303924416} {"train_lr": 0.001909030790236056, "train_min_lr": 0.001909030790236056, "train_loss": 0.2755936752640618, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007608415014170206, "epoch": 666, "n_parameters": 303924416} {"train_lr": 0.0019061860340072474, "train_min_lr": 0.0019061860340072474, "train_loss": 0.2755055770260067, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007559257734101266, "epoch": 667, "n_parameters": 303924416} {"train_lr": 0.001903339711564228, "train_min_lr": 0.001903339711564228, "train_loss": 0.2757164908793922, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007916897181600619, "epoch": 668, "n_parameters": 303924416} {"train_lr": 0.0019004918340189175, "train_min_lr": 0.0019004918340189175, "train_loss": 0.2756402365928993, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007737227158847814, "epoch": 669, "n_parameters": 303924416} {"train_lr": 0.0018976424124893226, "train_min_lr": 0.0018976424124893226, "train_loss": 0.2755650149192661, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007635841187901604, "epoch": 670, "n_parameters": 303924416} {"train_lr": 0.0018947914580994662, "train_min_lr": 0.0018947914580994662, "train_loss": 0.2755142171652271, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007582876053698457, "epoch": 671, "n_parameters": 303924416} {"train_lr": 0.0018919389819793608, "train_min_lr": 0.0018919389819793608, "train_loss": 0.27549850150871164, "train_loss_scale": 67216.41025641025, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 672, "n_parameters": 303924416} {"train_lr": 0.0018890849952649664, "train_min_lr": 0.0018890849952649664, "train_loss": 0.275496445196227, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007629611751793, "epoch": 673, "n_parameters": 303924416} {"train_lr": 0.0018862295090981218, "train_min_lr": 0.0018862295090981218, "train_loss": 0.27553095125581306, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007674962000097507, "epoch": 674, "n_parameters": 303924416} {"train_lr": 0.0018833725346265372, "train_min_lr": 0.0018833725346265372, "train_loss": 0.2755503875758642, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007713371120655957, "epoch": 675, "n_parameters": 303924416} {"train_lr": 0.0018805140830037252, "train_min_lr": 0.0018805140830037252, "train_loss": 0.27544535600986236, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.0075948090570119135, "epoch": 676, "n_parameters": 303924416} {"train_lr": 0.001877654165388965, "train_min_lr": 0.001877654165388965, "train_loss": 0.27536801687585044, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007587941796280062, "epoch": 677, "n_parameters": 303924416} {"train_lr": 0.001874792792947265, "train_min_lr": 0.001874792792947265, "train_loss": 0.2753695611668846, "train_loss_scale": 86961.23076923077, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007585414231587679, "epoch": 678, "n_parameters": 303924416} {"train_lr": 0.0018719299768493057, "train_min_lr": 0.0018719299768493057, "train_loss": 0.27550812364400673, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007775568545091515, "epoch": 679, "n_parameters": 303924416} {"train_lr": 0.0018690657282714014, "train_min_lr": 0.0018690657282714014, "train_loss": 0.2753040833900181, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007505666136216276, "epoch": 680, "n_parameters": 303924416} {"train_lr": 0.0018662000583954724, "train_min_lr": 0.0018662000583954724, "train_loss": 0.275416189733033, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00774314926746182, "epoch": 681, "n_parameters": 303924416} {"train_lr": 0.0018633329784089752, "train_min_lr": 0.0018633329784089752, "train_loss": 0.27551425639420557, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007824100688314782, "epoch": 682, "n_parameters": 303924416} {"train_lr": 0.0018604644995048785, "train_min_lr": 0.0018604644995048785, "train_loss": 0.2754621968144933, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007722119020166783, "epoch": 683, "n_parameters": 303924416} {"train_lr": 0.0018575946328816017, "train_min_lr": 0.0018575946328816017, "train_loss": 0.27536460391890544, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.0076569009718532935, "epoch": 684, "n_parameters": 303924416} {"train_lr": 0.0018547233897429978, "train_min_lr": 0.0018547233897429978, "train_loss": 0.2753558125442419, "train_loss_scale": 251221.33333333334, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007673023428278378, "epoch": 685, "n_parameters": 303924416} {"train_lr": 0.0018518507812982775, "train_min_lr": 0.0018518507812982775, "train_loss": 0.27526694195619666, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00764950881831539, "epoch": 686, "n_parameters": 303924416} {"train_lr": 0.0018489768187619955, "train_min_lr": 0.0018489768187619955, "train_loss": 0.2753084658239132, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007695277261798485, "epoch": 687, "n_parameters": 303924416} {"train_lr": 0.0018461015133539854, "train_min_lr": 0.0018461015133539854, "train_loss": 0.27521972639713055, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.0075759122100396035, "epoch": 688, "n_parameters": 303924416} {"train_lr": 0.0018432248762993175, "train_min_lr": 0.0018432248762993175, "train_loss": 0.2752180496553102, "train_loss_scale": 157118.35897435897, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 689, "n_parameters": 303924416} {"train_lr": 0.0018403469188282779, "train_min_lr": 0.0018403469188282779, "train_loss": 0.2752130594438849, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007654227568612744, "epoch": 690, "n_parameters": 303924416} {"train_lr": 0.0018374676521762895, "train_min_lr": 0.0018374676521762895, "train_loss": 0.2752200394946461, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007682707386079411, "epoch": 691, "n_parameters": 303924416} {"train_lr": 0.0018345870875838982, "train_min_lr": 0.0018345870875838982, "train_loss": 0.2754172771625842, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.0078966043328341, "epoch": 692, "n_parameters": 303924416} {"train_lr": 0.0018317052362967102, "train_min_lr": 0.0018317052362967102, "train_loss": 0.2752494665656764, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007663307278655851, "epoch": 693, "n_parameters": 303924416} {"train_lr": 0.0018288221095653606, "train_min_lr": 0.0018288221095653606, "train_loss": 0.2751832429934532, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00762343341156315, "epoch": 694, "n_parameters": 303924416} {"train_lr": 0.0018259377186454588, "train_min_lr": 0.0018259377186454588, "train_loss": 0.2751870196318636, "train_loss_scale": 182324.5128205128, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007691518194639148, "epoch": 695, "n_parameters": 303924416} {"train_lr": 0.0018230520747975509, "train_min_lr": 0.0018230520747975509, "train_loss": 0.27510582194484484, "train_loss_scale": 237778.05128205128, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 696, "n_parameters": 303924416} {"train_lr": 0.0018201651892870796, "train_min_lr": 0.0018201651892870796, "train_loss": 0.2751611045573671, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007812934079900002, "epoch": 697, "n_parameters": 303924416} {"train_lr": 0.0018172770733843224, "train_min_lr": 0.0018172770733843224, "train_loss": 0.275142926012333, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007664071582556249, "epoch": 698, "n_parameters": 303924416} {"train_lr": 0.0018143877383643727, "train_min_lr": 0.0018143877383643727, "train_loss": 0.2750722442049151, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00761876074083818, "epoch": 699, "n_parameters": 303924416} {"train_lr": 0.0018086054560970055, "train_min_lr": 0.0018086054560970055, "train_loss": 0.2750068510351225, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007657166979371164, "epoch": 701, "n_parameters": 303924416} {"train_lr": 0.0018057125314233852, "train_min_lr": 0.0018057125314233852, "train_loss": 0.275125049550134, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.0077888765043985005, "epoch": 702, "n_parameters": 303924416} {"train_lr": 0.00180281843278008, "train_min_lr": 0.00180281843278008, "train_loss": 0.2750303389158291, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.0076131312660156535, "epoch": 703, "n_parameters": 303924416} {"train_lr": 0.0017999231714655396, "train_min_lr": 0.0017999231714655396, "train_loss": 0.27502089536760765, "train_loss_scale": 232736.8205128205, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007741807174916642, "epoch": 704, "n_parameters": 303924416} {"train_lr": 0.0017970267587827415, "train_min_lr": 0.0017970267587827415, "train_loss": 0.27503970628663993, "train_loss_scale": 131912.20512820513, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 705, "n_parameters": 303924416} {"train_lr": 0.0017941292060391677, "train_min_lr": 0.0017941292060391677, "train_loss": 0.275031378868824, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007709443803506497, "epoch": 706, "n_parameters": 303924416} {"train_lr": 0.001791230524546753, "train_min_lr": 0.001791230524546753, "train_loss": 0.27507445744823855, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007741596245278533, "epoch": 707, "n_parameters": 303924416} {"train_lr": 0.0017883307256218244, "train_min_lr": 0.0017883307256218244, "train_loss": 0.27493662654589385, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007625095888029019, "epoch": 708, "n_parameters": 303924416} {"train_lr": 0.001785429820585086, "train_min_lr": 0.001785429820585086, "train_loss": 0.2749764331861232, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007701226439959823, "epoch": 709, "n_parameters": 303924416} {"train_lr": 0.0017825278207615554, "train_min_lr": 0.0017825278207615554, "train_loss": 0.2749253298030593, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007690111872178908, "epoch": 710, "n_parameters": 303924416} {"train_lr": 0.0017796247374805261, "train_min_lr": 0.0017796247374805261, "train_loss": 0.27493132597122055, "train_loss_scale": 207530.66666666666, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007695113083359619, "epoch": 711, "n_parameters": 303924416} {"train_lr": 0.0017767205820755137, "train_min_lr": 0.0017767205820755137, "train_loss": 0.2749028220916024, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007698350605399658, "epoch": 712, "n_parameters": 303924416} {"train_lr": 0.0017738153658842265, "train_min_lr": 0.0017738153658842265, "train_loss": 0.27502027183221894, "train_loss_scale": 145775.58974358975, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 713, "n_parameters": 303924416} {"train_lr": 0.0017709091002485128, "train_min_lr": 0.0017709091002485128, "train_loss": 0.2749410132244707, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.0077742544802813195, "epoch": 714, "n_parameters": 303924416} {"train_lr": 0.0017680017965143165, "train_min_lr": 0.0017680017965143165, "train_loss": 0.27503649561773413, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007884162321436005, "epoch": 715, "n_parameters": 303924416} {"train_lr": 0.001765093466031638, "train_min_lr": 0.001765093466031638, "train_loss": 0.2749439253418062, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007756671205592843, "epoch": 716, "n_parameters": 303924416} {"train_lr": 0.0017621841201544786, "train_min_lr": 0.0017621841201544786, "train_loss": 0.27487779785998356, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.0077741616166746, "epoch": 717, "n_parameters": 303924416} {"train_lr": 0.0017592737702408106, "train_min_lr": 0.0017592737702408106, "train_loss": 0.27494618461992687, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007896127162847476, "epoch": 718, "n_parameters": 303924416} {"train_lr": 0.001756362427652523, "train_min_lr": 0.001756362427652523, "train_loss": 0.2749173443806238, "train_loss_scale": 193667.28205128206, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.0077829166757277185, "epoch": 719, "n_parameters": 303924416} {"train_lr": 0.0017505368099189806, "train_min_lr": 0.0017505368099189806, "train_loss": 0.27496346682369804, "train_loss_scale": 257942.97435897434, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 721, "n_parameters": 303924416} {"train_lr": 0.0017476225575167052, "train_min_lr": 0.0017476225575167052, "train_loss": 0.2748480756223823, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.0077579419892758895, "epoch": 722, "n_parameters": 303924416} {"train_lr": 0.0017447073579256777, "train_min_lr": 0.0017447073579256777, "train_loss": 0.2748895204500653, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007908091084876409, "epoch": 723, "n_parameters": 303924416} {"train_lr": 0.0017417912225267211, "train_min_lr": 0.0017417912225267211, "train_loss": 0.2748533434604701, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007853438703116411, "epoch": 724, "n_parameters": 303924416} {"train_lr": 0.0017388741627043104, "train_min_lr": 0.0017388741627043104, "train_loss": 0.27483502712148505, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00790090920128979, "epoch": 725, "n_parameters": 303924416} {"train_lr": 0.0017359561898465316, "train_min_lr": 0.0017359561898465316, "train_loss": 0.2749497128549056, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007947247030619437, "epoch": 726, "n_parameters": 303924416} {"train_lr": 0.0017330373153450322, "train_min_lr": 0.0017330373153450322, "train_loss": 0.27478484934172, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007778688572729245, "epoch": 727, "n_parameters": 303924416} {"train_lr": 0.001730117550594988, "train_min_lr": 0.001730117550594988, "train_loss": 0.2747420286908985, "train_loss_scale": 212571.89743589744, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007759326940569549, "epoch": 728, "n_parameters": 303924416} {"train_lr": 0.0017271969069950334, "train_min_lr": 0.0017271969069950334, "train_loss": 0.2747152174303595, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007852581198303362, "epoch": 729, "n_parameters": 303924416} {"train_lr": 0.001724275395947252, "train_min_lr": 0.001724275395947252, "train_loss": 0.27476235111363423, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007814045858163482, "epoch": 730, "n_parameters": 303924416} {"train_lr": 0.0017213530288571037, "train_min_lr": 0.0017213530288571037, "train_loss": 0.2748442915393613, "train_loss_scale": 195767.79487179487, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 731, "n_parameters": 303924416} {"train_lr": 0.0017184298171333916, "train_min_lr": 0.0017184298171333916, "train_loss": 0.27470202883108497, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007803463626306695, "epoch": 732, "n_parameters": 303924416} {"train_lr": 0.0017155057721882157, "train_min_lr": 0.0017155057721882157, "train_loss": 0.27464384107719153, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007770713427677178, "epoch": 733, "n_parameters": 303924416} {"train_lr": 0.0017125809054369357, "train_min_lr": 0.0017125809054369357, "train_loss": 0.27466363637135005, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007844593207268283, "epoch": 734, "n_parameters": 303924416} {"train_lr": 0.0017096552282981094, "train_min_lr": 0.0017096552282981094, "train_loss": 0.27463039085089874, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00783206394706399, "epoch": 735, "n_parameters": 303924416} {"train_lr": 0.001706728752193467, "train_min_lr": 0.001706728752193467, "train_loss": 0.27463788252610427, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007842103220844785, "epoch": 736, "n_parameters": 303924416} {"train_lr": 0.0017038014885478482, "train_min_lr": 0.0017038014885478482, "train_loss": 0.2747027021832764, "train_loss_scale": 143675.07692307694, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007915506911022255, "epoch": 737, "n_parameters": 303924416} {"train_lr": 0.0017008734487891795, "train_min_lr": 0.0017008734487891795, "train_loss": 0.27473110331294054, "train_loss_scale": 212992.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 738, "n_parameters": 303924416} {"train_lr": 0.0016979446443484076, "train_min_lr": 0.0016979446443484076, "train_loss": 0.27463946869680417, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007844197901132971, "epoch": 739, "n_parameters": 303924416} {"train_lr": 0.0016950150866594704, "train_min_lr": 0.0016950150866594704, "train_loss": 0.27458059557606107, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007803912402894826, "epoch": 740, "n_parameters": 303924416} {"train_lr": 0.0016920847871592394, "train_min_lr": 0.0016920847871592394, "train_loss": 0.274572461983786, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007851701114887897, "epoch": 741, "n_parameters": 303924416} {"train_lr": 0.001689153757287491, "train_min_lr": 0.001689153757287491, "train_loss": 0.2745291720508622, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00780239773209756, "epoch": 742, "n_parameters": 303924416} {"train_lr": 0.0016862220084868456, "train_min_lr": 0.0016862220084868456, "train_loss": 0.27457297059635705, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007943235979510996, "epoch": 743, "n_parameters": 303924416} {"train_lr": 0.0016832895522027335, "train_min_lr": 0.0016832895522027335, "train_loss": 0.27456561737777424, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007894004219008658, "epoch": 744, "n_parameters": 303924416} {"train_lr": 0.001680356399883348, "train_min_lr": 0.001680356399883348, "train_loss": 0.2746270433223496, "train_loss_scale": 257522.87179487178, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00802814561384133, "epoch": 745, "n_parameters": 303924416} {"train_lr": 0.001677422562979598, "train_min_lr": 0.001677422562979598, "train_loss": 0.2744995080686819, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007865070386264378, "epoch": 746, "n_parameters": 303924416} {"train_lr": 0.0016744880529450644, "train_min_lr": 0.0016744880529450644, "train_loss": 0.27460404852023107, "train_loss_scale": 205010.05128205128, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 747, "n_parameters": 303924416} {"train_lr": 0.0016715528812359585, "train_min_lr": 0.0016715528812359585, "train_loss": 0.2745503179806595, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007980267781441888, "epoch": 748, "n_parameters": 303924416} {"train_lr": 0.0016686170593110696, "train_min_lr": 0.0016686170593110696, "train_loss": 0.2746448578217473, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00816731931319317, "epoch": 749, "n_parameters": 303924416} {"train_lr": 0.0016656805986317326, "train_min_lr": 0.0016656805986317326, "train_loss": 0.2745123346819757, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007907073535860922, "epoch": 750, "n_parameters": 303924416} {"train_lr": 0.001662743510661771, "train_min_lr": 0.001662743510661771, "train_loss": 0.2745044944891467, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007976230748821624, "epoch": 751, "n_parameters": 303924416} {"train_lr": 0.0016598058068674597, "train_min_lr": 0.0016598058068674597, "train_loss": 0.2745367409900213, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008000533266637761, "epoch": 752, "n_parameters": 303924416} {"train_lr": 0.0016568674987174749, "train_min_lr": 0.0016568674987174749, "train_loss": 0.2745823660949006, "train_loss_scale": 134432.8205128205, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00809923858823589, "epoch": 753, "n_parameters": 303924416} {"train_lr": 0.001653928597682856, "train_min_lr": 0.001653928597682856, "train_loss": 0.2745067202844299, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008020647265649855, "epoch": 754, "n_parameters": 303924416} {"train_lr": 0.0016509891152369522, "train_min_lr": 0.0016509891152369522, "train_loss": 0.2744494179347291, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007958625699990453, "epoch": 755, "n_parameters": 303924416} {"train_lr": 0.0016480490628553886, "train_min_lr": 0.0016480490628553886, "train_loss": 0.274415180633346, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00795318966713519, "epoch": 756, "n_parameters": 303924416} {"train_lr": 0.0016451084520160096, "train_min_lr": 0.0016451084520160096, "train_loss": 0.274438868679751, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00805787431846301, "epoch": 757, "n_parameters": 303924416} {"train_lr": 0.001642167294198843, "train_min_lr": 0.001642167294198843, "train_loss": 0.27443265901782954, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008018619312236134, "epoch": 758, "n_parameters": 303924416} {"train_lr": 0.0016392256008860519, "train_min_lr": 0.0016392256008860519, "train_loss": 0.27449510514270514, "train_loss_scale": 238198.15384615384, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 759, "n_parameters": 303924416} {"train_lr": 0.0016362833835618874, "train_min_lr": 0.0016362833835618874, "train_loss": 0.2743927085509476, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007960340302162923, "epoch": 760, "n_parameters": 303924416} {"train_lr": 0.00163334065371265, "train_min_lr": 0.00163334065371265, "train_loss": 0.2743956613330505, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008037357317964332, "epoch": 761, "n_parameters": 303924416} {"train_lr": 0.0016303974228266398, "train_min_lr": 0.0016303974228266398, "train_loss": 0.27439411293763, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008057913290730748, "epoch": 762, "n_parameters": 303924416} {"train_lr": 0.0016274537023941095, "train_min_lr": 0.0016274537023941095, "train_loss": 0.2744449522429838, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008066785242707014, "epoch": 763, "n_parameters": 303924416} {"train_lr": 0.0016245095039072266, "train_min_lr": 0.0016245095039072266, "train_loss": 0.2742867269612944, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007969889821162304, "epoch": 764, "n_parameters": 303924416} {"train_lr": 0.0016215648388600266, "train_min_lr": 0.0016215648388600266, "train_loss": 0.274357081268532, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00815137465729211, "epoch": 765, "n_parameters": 303924416} {"train_lr": 0.001618619718748361, "train_min_lr": 0.001618619718748361, "train_loss": 0.27426474697840136, "train_loss_scale": 232316.71794871794, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007988285568829339, "epoch": 766, "n_parameters": 303924416} {"train_lr": 0.0016156741550698618, "train_min_lr": 0.0016156741550698618, "train_loss": 0.27430006868253726, "train_loss_scale": 158798.76923076922, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 767, "n_parameters": 303924416} {"train_lr": 0.0016127281593238927, "train_min_lr": 0.0016127281593238927, "train_loss": 0.27439184763277763, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008150898220125012, "epoch": 768, "n_parameters": 303924416} {"train_lr": 0.0016097817430115046, "train_min_lr": 0.0016097817430115046, "train_loss": 0.27425499607880527, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008053015775751704, "epoch": 769, "n_parameters": 303924416} {"train_lr": 0.0016068349176353834, "train_min_lr": 0.0016068349176353834, "train_loss": 0.27439428985309905, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008324102300087897, "epoch": 770, "n_parameters": 303924416} {"train_lr": 0.001603887694699822, "train_min_lr": 0.001603887694699822, "train_loss": 0.27431529741256666, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008063299529170856, "epoch": 771, "n_parameters": 303924416} {"train_lr": 0.0016009400857106583, "train_min_lr": 0.0016009400857106583, "train_loss": 0.2742305904155215, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.007966164230870513, "epoch": 772, "n_parameters": 303924416} {"train_lr": 0.0015979921021752421, "train_min_lr": 0.0015979921021752421, "train_loss": 0.2741065278816491, "train_loss_scale": 180644.10256410256, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00792350955098533, "epoch": 773, "n_parameters": 303924416} {"train_lr": 0.001595043755602381, "train_min_lr": 0.001595043755602381, "train_loss": 0.27424746917453235, "train_loss_scale": 172242.05128205128, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 774, "n_parameters": 303924416} {"train_lr": 0.001592095057502303, "train_min_lr": 0.001592095057502303, "train_loss": 0.27426953388091463, "train_loss_scale": 82970.2564102564, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 775, "n_parameters": 303924416} {"train_lr": 0.0015891460193866087, "train_min_lr": 0.0015891460193866087, "train_loss": 0.2742877573443529, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008270859185498781, "epoch": 776, "n_parameters": 303924416} {"train_lr": 0.0015861966527682212, "train_min_lr": 0.0015861966527682212, "train_loss": 0.2742329652283866, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008218428187179737, "epoch": 777, "n_parameters": 303924416} {"train_lr": 0.0015832469691613538, "train_min_lr": 0.0015832469691613538, "train_loss": 0.27427206154411227, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008255101131418576, "epoch": 778, "n_parameters": 303924416} {"train_lr": 0.00158029698008145, "train_min_lr": 0.00158029698008145, "train_loss": 0.2742008987085846, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008150081656192644, "epoch": 779, "n_parameters": 303924416} {"train_lr": 0.0015773466970451506, "train_min_lr": 0.0015773466970451506, "train_loss": 0.2741329634938246, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008091159660416918, "epoch": 780, "n_parameters": 303924416} {"train_lr": 0.0015743961315702415, "train_min_lr": 0.0015743961315702415, "train_loss": 0.27416650047346663, "train_loss_scale": 86751.17948717948, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008191007446163358, "epoch": 781, "n_parameters": 303924416} {"train_lr": 0.001571445295175614, "train_min_lr": 0.001571445295175614, "train_loss": 0.2741834230106085, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00816087188342443, "epoch": 782, "n_parameters": 303924416} {"train_lr": 0.0015684941993812142, "train_min_lr": 0.0015684941993812142, "train_loss": 0.27414019402259815, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008138537503934154, "epoch": 783, "n_parameters": 303924416} {"train_lr": 0.0015655428557080017, "train_min_lr": 0.0015655428557080017, "train_loss": 0.2740773516169821, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008125107541369895, "epoch": 784, "n_parameters": 303924416} {"train_lr": 0.0015625912756779038, "train_min_lr": 0.0015625912756779038, "train_loss": 0.27413901276826763, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008180538040155975, "epoch": 785, "n_parameters": 303924416} {"train_lr": 0.0015596394708137726, "train_min_lr": 0.0015596394708137726, "train_loss": 0.2740691499635338, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008155781877203247, "epoch": 786, "n_parameters": 303924416} {"train_lr": 0.0015566874526393342, "train_min_lr": 0.0015566874526393342, "train_loss": 0.27411765455363846, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008256818945459926, "epoch": 787, "n_parameters": 303924416} {"train_lr": 0.0015537352326791501, "train_min_lr": 0.0015537352326791501, "train_loss": 0.27412081252222353, "train_loss_scale": 250801.23076923078, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00824158316782604, "epoch": 788, "n_parameters": 303924416} {"train_lr": 0.0015507828224585658, "train_min_lr": 0.0015507828224585658, "train_loss": 0.27401930370177024, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008137460484599264, "epoch": 789, "n_parameters": 303924416} {"train_lr": 0.0015478302335036798, "train_min_lr": 0.0015478302335036798, "train_loss": 0.2739320643729745, "train_loss_scale": 235677.53846153847, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 790, "n_parameters": 303924416} {"train_lr": 0.0015448774773412749, "train_min_lr": 0.0015448774773412749, "train_loss": 0.27397781156170636, "train_loss_scale": 95783.38461538461, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 791, "n_parameters": 303924416} {"train_lr": 0.001541924565498795, "train_min_lr": 0.001541924565498795, "train_loss": 0.2739541618607174, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008077548113830674, "epoch": 792, "n_parameters": 303924416} {"train_lr": 0.0015389715095042898, "train_min_lr": 0.0015389715095042898, "train_loss": 0.2738610633696692, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008093379542398721, "epoch": 793, "n_parameters": 303924416} {"train_lr": 0.0015360183208863727, "train_min_lr": 0.0015360183208863727, "train_loss": 0.27399821700946164, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008258708051214807, "epoch": 794, "n_parameters": 303924416} {"train_lr": 0.0015330650111741698, "train_min_lr": 0.0015330650111741698, "train_loss": 0.2739032693834116, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008148645071718747, "epoch": 795, "n_parameters": 303924416} {"train_lr": 0.0015301115918972897, "train_min_lr": 0.0015301115918972897, "train_loss": 0.27389913828721124, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008183555529775241, "epoch": 796, "n_parameters": 303924416} {"train_lr": 0.001527158074585758, "train_min_lr": 0.001527158074585758, "train_loss": 0.2738478383652341, "train_loss_scale": 73938.05128205128, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008153952631311348, "epoch": 797, "n_parameters": 303924416} {"train_lr": 0.0015242044707699904, "train_min_lr": 0.0015242044707699904, "train_loss": 0.27387165552220094, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008191267274714146, "epoch": 798, "n_parameters": 303924416} {"train_lr": 0.0015212507919807395, "train_min_lr": 0.0015212507919807395, "train_loss": 0.2738883883859485, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008183548595601072, "epoch": 799, "n_parameters": 303924416} {"train_lr": 0.0015182970497490454, "train_min_lr": 0.0015182970497490454, "train_loss": 0.2739643119753171, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00845014626154294, "epoch": 800, "n_parameters": 303924416} {"train_lr": 0.0015153432556062026, "train_min_lr": 0.0015153432556062026, "train_loss": 0.2739391863966982, "train_loss_scale": 81499.89743589744, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 801, "n_parameters": 303924416} {"train_lr": 0.0015123894210837038, "train_min_lr": 0.0015123894210837038, "train_loss": 0.2738243444637658, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008195167879024759, "epoch": 802, "n_parameters": 303924416} {"train_lr": 0.0015094355577131994, "train_min_lr": 0.0015094355577131994, "train_loss": 0.273774485100801, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008124685041809406, "epoch": 803, "n_parameters": 303924416} {"train_lr": 0.0015064816770264577, "train_min_lr": 0.0015064816770264577, "train_loss": 0.27379282181246734, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008199674786271479, "epoch": 804, "n_parameters": 303924416} {"train_lr": 0.0015035277905553067, "train_min_lr": 0.0015035277905553067, "train_loss": 0.273768287184887, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008191516581110848, "epoch": 805, "n_parameters": 303924416} {"train_lr": 0.0015005739098316025, "train_min_lr": 0.0015005739098316025, "train_loss": 0.2737181988508942, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00817857805538015, "epoch": 806, "n_parameters": 303924416} {"train_lr": 0.001497620046387179, "train_min_lr": 0.001497620046387179, "train_loss": 0.2737769308195521, "train_loss_scale": 88221.53846153847, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008271194366105378, "epoch": 807, "n_parameters": 303924416} {"train_lr": 0.001494666211753796, "train_min_lr": 0.001494666211753796, "train_loss": 0.2738193152412677, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008311093483382884, "epoch": 808, "n_parameters": 303924416} {"train_lr": 0.0014917124174631104, "train_min_lr": 0.0014917124174631104, "train_loss": 0.27383318593582284, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008454662696936, "epoch": 809, "n_parameters": 303924416} {"train_lr": 0.001488758675046614, "train_min_lr": 0.001488758675046614, "train_loss": 0.27370771738246846, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00827273573183144, "epoch": 810, "n_parameters": 303924416} {"train_lr": 0.0014858049960356009, "train_min_lr": 0.0014858049960356009, "train_loss": 0.27372317602380347, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008372245183310065, "epoch": 811, "n_parameters": 303924416} {"train_lr": 0.0014828513919611134, "train_min_lr": 0.0014828513919611134, "train_loss": 0.2736737968829962, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008165444953379054, "epoch": 812, "n_parameters": 303924416} {"train_lr": 0.0014798978743539074, "train_min_lr": 0.0014798978743539074, "train_loss": 0.2736725074370416, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008246295851094123, "epoch": 813, "n_parameters": 303924416} {"train_lr": 0.001476944454744393, "train_min_lr": 0.001476944454744393, "train_loss": 0.27358242131184596, "train_loss_scale": 158798.76923076922, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 814, "n_parameters": 303924416} {"train_lr": 0.0014739911446626046, "train_min_lr": 0.0014739911446626046, "train_loss": 0.2735983620708187, "train_loss_scale": 67636.51282051283, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 815, "n_parameters": 303924416} {"train_lr": 0.0014710379556381466, "train_min_lr": 0.0014710379556381466, "train_loss": 0.27360984290209717, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008298536622299789, "epoch": 816, "n_parameters": 303924416} {"train_lr": 0.001468084899200151, "train_min_lr": 0.001468084899200151, "train_loss": 0.2736662176873487, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008448572009980965, "epoch": 817, "n_parameters": 303924416} {"train_lr": 0.0014651319868772296, "train_min_lr": 0.0014651319868772296, "train_loss": 0.2736130826599084, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008352835695498073, "epoch": 818, "n_parameters": 303924416} {"train_lr": 0.001462179230197436, "train_min_lr": 0.001462179230197436, "train_loss": 0.27355477489972824, "train_loss_scale": 59024.41025641026, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 819, "n_parameters": 303924416} {"train_lr": 0.0014592266406882124, "train_min_lr": 0.0014592266406882124, "train_loss": 0.27360915110553974, "train_loss_scale": 32768.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008385884765093813, "epoch": 820, "n_parameters": 303924416} {"train_lr": 0.0014562742298763521, "train_min_lr": 0.0014562742298763521, "train_loss": 0.27361402944482577, "train_loss_scale": 32768.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008471669093407214, "epoch": 821, "n_parameters": 303924416} {"train_lr": 0.0014533220092879473, "train_min_lr": 0.0014533220092879473, "train_loss": 0.2735773858327705, "train_loss_scale": 32768.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008281760819208546, "epoch": 822, "n_parameters": 303924416} {"train_lr": 0.0014503699904483498, "train_min_lr": 0.0014503699904483498, "train_loss": 0.2735233495477587, "train_loss_scale": 32768.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00831329226732636, "epoch": 823, "n_parameters": 303924416} {"train_lr": 0.0014474181848821218, "train_min_lr": 0.0014474181848821218, "train_loss": 0.2734993436009599, "train_loss_scale": 32768.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008302346916678242, "epoch": 824, "n_parameters": 303924416} {"train_lr": 0.0014444666041129952, "train_min_lr": 0.0014444666041129952, "train_loss": 0.27348984021824807, "train_loss_scale": 32768.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008338006198382339, "epoch": 825, "n_parameters": 303924416} {"train_lr": 0.0014415152596638215, "train_min_lr": 0.0014415152596638215, "train_loss": 0.27346888766325533, "train_loss_scale": 58604.307692307695, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008451900449402344, "epoch": 826, "n_parameters": 303924416} {"train_lr": 0.0014385641630565349, "train_min_lr": 0.0014385641630565349, "train_loss": 0.2735647247172892, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00844929195367373, "epoch": 827, "n_parameters": 303924416} {"train_lr": 0.001435613325812093, "train_min_lr": 0.001435613325812093, "train_loss": 0.273504243294995, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008430432562584965, "epoch": 828, "n_parameters": 303924416} {"train_lr": 0.001432662759450452, "train_min_lr": 0.001432662759450452, "train_loss": 0.2734979898400175, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008400503389286594, "epoch": 829, "n_parameters": 303924416} {"train_lr": 0.0014297124754905023, "train_min_lr": 0.0014297124754905023, "train_loss": 0.27350569548956954, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008489007146682782, "epoch": 830, "n_parameters": 303924416} {"train_lr": 0.0014267624854500333, "train_min_lr": 0.0014267624854500333, "train_loss": 0.2734691696181798, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008457583669298448, "epoch": 831, "n_parameters": 303924416} {"train_lr": 0.0014238128008456877, "train_min_lr": 0.0014238128008456877, "train_loss": 0.2734530776864491, "train_loss_scale": 90322.05128205128, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00847272689973649, "epoch": 832, "n_parameters": 303924416} {"train_lr": 0.00142086343319292, "train_min_lr": 0.00142086343319292, "train_loss": 0.2734491462902858, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008509223379242497, "epoch": 833, "n_parameters": 303924416} {"train_lr": 0.0014179143940059404, "train_min_lr": 0.0014179143940059404, "train_loss": 0.2734345438418528, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008463835639234345, "epoch": 834, "n_parameters": 303924416} {"train_lr": 0.001414965694797677, "train_min_lr": 0.001414965694797677, "train_loss": 0.27345676921928924, "train_loss_scale": 114688.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 835, "n_parameters": 303924416} {"train_lr": 0.00141201734707974, "train_min_lr": 0.00141201734707974, "train_loss": 0.2733825568485862, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008470772608863907, "epoch": 836, "n_parameters": 303924416} {"train_lr": 0.0014090693623623558, "train_min_lr": 0.0014090693623623558, "train_loss": 0.2733870127942795, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008506694472788904, "epoch": 837, "n_parameters": 303924416} {"train_lr": 0.001406121752154341, "train_min_lr": 0.001406121752154341, "train_loss": 0.2733862338324961, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008469331906571124, "epoch": 838, "n_parameters": 303924416} {"train_lr": 0.0014031745279630477, "train_min_lr": 0.0014031745279630477, "train_loss": 0.2734108915319666, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008536955079016013, "epoch": 839, "n_parameters": 303924416} {"train_lr": 0.0014002277012943208, "train_min_lr": 0.0014002277012943208, "train_loss": 0.2735454115646485, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008778814483099641, "epoch": 840, "n_parameters": 303924416} {"train_lr": 0.0013972812836524518, "train_min_lr": 0.0013972812836524518, "train_loss": 0.2733614085486923, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008533424712741414, "epoch": 841, "n_parameters": 303924416} {"train_lr": 0.0013943352865401412, "train_min_lr": 0.0013943352865401412, "train_loss": 0.2733412855871929, "train_loss_scale": 120569.43589743589, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008499972384948378, "epoch": 842, "n_parameters": 303924416} {"train_lr": 0.0013913897214584403, "train_min_lr": 0.0013913897214584403, "train_loss": 0.2733879990917511, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008705696182098622, "epoch": 843, "n_parameters": 303924416} {"train_lr": 0.0013884445999067208, "train_min_lr": 0.0013884445999067208, "train_loss": 0.2734146181923839, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008567863542479105, "epoch": 844, "n_parameters": 303924416} {"train_lr": 0.0013854999333826144, "train_min_lr": 0.0013854999333826144, "train_loss": 0.2734255700802001, "train_loss_scale": 116578.46153846153, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 845, "n_parameters": 303924416} {"train_lr": 0.0013825557333819853, "train_min_lr": 0.0013825557333819853, "train_loss": 0.27327872206194276, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008455365367878515, "epoch": 846, "n_parameters": 303924416} {"train_lr": 0.0013796120113988711, "train_min_lr": 0.0013796120113988711, "train_loss": 0.27320981997017485, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008412453992208704, "epoch": 847, "n_parameters": 303924416} {"train_lr": 0.001376668778925445, "train_min_lr": 0.001376668778925445, "train_loss": 0.27314631998813593, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00837303456127978, "epoch": 848, "n_parameters": 303924416} {"train_lr": 0.0013737260474519669, "train_min_lr": 0.0013737260474519669, "train_loss": 0.2732026756394846, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008600071717340212, "epoch": 849, "n_parameters": 303924416} {"train_lr": 0.0013707838284667446, "train_min_lr": 0.0013707838284667446, "train_loss": 0.2731469252642292, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008425223648560066, "epoch": 850, "n_parameters": 303924416} {"train_lr": 0.0013678421334560834, "train_min_lr": 0.0013678421334560834, "train_loss": 0.27330643395618653, "train_loss_scale": 39804.717948717946, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 851, "n_parameters": 303924416} {"train_lr": 0.0013649009739042414, "train_min_lr": 0.0013649009739042414, "train_loss": 0.2732695824830817, "train_loss_scale": 32768.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008642308145331649, "epoch": 852, "n_parameters": 303924416} {"train_lr": 0.00136196036129339, "train_min_lr": 0.00136196036129339, "train_loss": 0.2731530354925407, "train_loss_scale": 32768.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00850127388138133, "epoch": 853, "n_parameters": 303924416} {"train_lr": 0.0013590203071035607, "train_min_lr": 0.0013590203071035607, "train_loss": 0.27322276395399314, "train_loss_scale": 32768.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008693123991505649, "epoch": 854, "n_parameters": 303924416} {"train_lr": 0.0013560808228126077, "train_min_lr": 0.0013560808228126077, "train_loss": 0.27314208193013495, "train_loss_scale": 32768.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008454205056365866, "epoch": 855, "n_parameters": 303924416} {"train_lr": 0.0013531419198961616, "train_min_lr": 0.0013531419198961616, "train_loss": 0.2731202112134689, "train_loss_scale": 32768.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00862640309899759, "epoch": 856, "n_parameters": 303924416} {"train_lr": 0.0013502036098275803, "train_min_lr": 0.0013502036098275803, "train_loss": 0.2731077818928334, "train_loss_scale": 45056.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008576855773870379, "epoch": 857, "n_parameters": 303924416} {"train_lr": 0.0013472659040779105, "train_min_lr": 0.0013472659040779105, "train_loss": 0.27321381154111946, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008761933946516365, "epoch": 858, "n_parameters": 303924416} {"train_lr": 0.0013443288141158347, "train_min_lr": 0.0013443288141158347, "train_loss": 0.27308517329406756, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008489338210963, "epoch": 859, "n_parameters": 303924416} {"train_lr": 0.0013413923514076363, "train_min_lr": 0.0013413923514076363, "train_loss": 0.27301541142738783, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008598439583483223, "epoch": 860, "n_parameters": 303924416} {"train_lr": 0.0013384565274171493, "train_min_lr": 0.0013384565274171493, "train_loss": 0.27299674896475595, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008535743602969421, "epoch": 861, "n_parameters": 303924416} {"train_lr": 0.001335521353605712, "train_min_lr": 0.001335521353605712, "train_loss": 0.27292805654593766, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008519025135916682, "epoch": 862, "n_parameters": 303924416} {"train_lr": 0.0013325868414321238, "train_min_lr": 0.0013325868414321238, "train_loss": 0.27299238637818074, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008573210450450484, "epoch": 863, "n_parameters": 303924416} {"train_lr": 0.0013296530023526054, "train_min_lr": 0.0013296530023526054, "train_loss": 0.27291378273198813, "train_loss_scale": 128761.43589743589, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008462307335904393, "epoch": 864, "n_parameters": 303924416} {"train_lr": 0.0013267198478207453, "train_min_lr": 0.0013267198478207453, "train_loss": 0.2729013278387869, "train_loss_scale": 77298.8717948718, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 865, "n_parameters": 303924416} {"train_lr": 0.0013237873892874622, "train_min_lr": 0.0013237873892874622, "train_loss": 0.27291790374781555, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008640135322004939, "epoch": 866, "n_parameters": 303924416} {"train_lr": 0.0013208556382009557, "train_min_lr": 0.0013208556382009557, "train_loss": 0.2728715465010072, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008568007167643653, "epoch": 867, "n_parameters": 303924416} {"train_lr": 0.0013179246060066679, "train_min_lr": 0.0013179246060066679, "train_loss": 0.27290842739733845, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008665436434034163, "epoch": 868, "n_parameters": 303924416} {"train_lr": 0.0013149943041472286, "train_min_lr": 0.0013149943041472286, "train_loss": 0.272968539624857, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008801725946772749, "epoch": 869, "n_parameters": 303924416} {"train_lr": 0.0013120647440624184, "train_min_lr": 0.0013120647440624184, "train_loss": 0.27297520724375945, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008770567511256116, "epoch": 870, "n_parameters": 303924416} {"train_lr": 0.0013091359371891224, "train_min_lr": 0.0013091359371891224, "train_loss": 0.27296213846271616, "train_loss_scale": 92422.56410256411, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008923869388989912, "epoch": 871, "n_parameters": 303924416} {"train_lr": 0.0013062078949612887, "train_min_lr": 0.0013062078949612887, "train_loss": 0.2729375486751684, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008765614096027536, "epoch": 872, "n_parameters": 303924416} {"train_lr": 0.0013032806288098726, "train_min_lr": 0.0013032806288098726, "train_loss": 0.27289791334754765, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008847267841562055, "epoch": 873, "n_parameters": 303924416} {"train_lr": 0.001300354150162807, "train_min_lr": 0.001300354150162807, "train_loss": 0.2728456122717128, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00866333021221157, "epoch": 874, "n_parameters": 303924416} {"train_lr": 0.0012974284704449469, "train_min_lr": 0.0012974284704449469, "train_loss": 0.2728110115007999, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008657359876311742, "epoch": 875, "n_parameters": 303924416} {"train_lr": 0.0012945036010780276, "train_min_lr": 0.0012945036010780276, "train_loss": 0.27278993643509847, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00865403680692021, "epoch": 876, "n_parameters": 303924416} {"train_lr": 0.0012915795534806228, "train_min_lr": 0.0012915795534806228, "train_loss": 0.27275571890342504, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008709393653612679, "epoch": 877, "n_parameters": 303924416} {"train_lr": 0.0012886563390680972, "train_min_lr": 0.0012886563390680972, "train_loss": 0.2728271995199462, "train_loss_scale": 197028.10256410256, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 878, "n_parameters": 303924416} {"train_lr": 0.0012857339692525628, "train_min_lr": 0.0012857339692525628, "train_loss": 0.2728631751378998, "train_loss_scale": 94523.07692307692, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 879, "n_parameters": 303924416} {"train_lr": 0.0012828124554428338, "train_min_lr": 0.0012828124554428338, "train_loss": 0.27283632583641565, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008809903538069474, "epoch": 880, "n_parameters": 303924416} {"train_lr": 0.0012798918090443838, "train_min_lr": 0.0012798918090443838, "train_loss": 0.2728437103408699, "train_loss_scale": 35183.58974358974, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 881, "n_parameters": 303924416} {"train_lr": 0.0012769720414592995, "train_min_lr": 0.0012769720414592995, "train_loss": 0.27289262142044324, "train_loss_scale": 32768.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00896017445036425, "epoch": 882, "n_parameters": 303924416} {"train_lr": 0.0012740531640862353, "train_min_lr": 0.0012740531640862353, "train_loss": 0.27277792066944617, "train_loss_scale": 32768.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008708999704163617, "epoch": 883, "n_parameters": 303924416} {"train_lr": 0.0012711351883203758, "train_min_lr": 0.0012711351883203758, "train_loss": 0.27271995942394894, "train_loss_scale": 32768.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008710399840194255, "epoch": 884, "n_parameters": 303924416} {"train_lr": 0.0012682181255533767, "train_min_lr": 0.0012682181255533767, "train_loss": 0.2729065235531053, "train_loss_scale": 32768.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.009249234709363336, "epoch": 885, "n_parameters": 303924416} {"train_lr": 0.0012653019871733367, "train_min_lr": 0.0012653019871733367, "train_loss": 0.27279817366984505, "train_loss_scale": 32768.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008801985875321314, "epoch": 886, "n_parameters": 303924416} {"train_lr": 0.0012623867845647414, "train_min_lr": 0.0012623867845647414, "train_loss": 0.2727789756519577, "train_loss_scale": 49677.1282051282, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008981116396637682, "epoch": 887, "n_parameters": 303924416} {"train_lr": 0.0012594725291084278, "train_min_lr": 0.0012594725291084278, "train_loss": 0.27275066890526944, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008939157102782374, "epoch": 888, "n_parameters": 303924416} {"train_lr": 0.0012565592321815298, "train_min_lr": 0.0012565592321815298, "train_loss": 0.27272197282395494, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008875840779346151, "epoch": 889, "n_parameters": 303924416} {"train_lr": 0.001253646905157445, "train_min_lr": 0.001253646905157445, "train_loss": 0.2726581600608113, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008697517207250573, "epoch": 890, "n_parameters": 303924416} {"train_lr": 0.0012507355594057786, "train_min_lr": 0.0012507355594057786, "train_loss": 0.27264575955147546, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008813127606677322, "epoch": 891, "n_parameters": 303924416} {"train_lr": 0.001247825206292309, "train_min_lr": 0.001247825206292309, "train_loss": 0.27258316217324674, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008834942488763958, "epoch": 892, "n_parameters": 303924416} {"train_lr": 0.0012449158571789395, "train_min_lr": 0.0012449158571789395, "train_loss": 0.272705145030975, "train_loss_scale": 72467.69230769231, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.009113483453312745, "epoch": 893, "n_parameters": 303924416} {"train_lr": 0.0012420075234236513, "train_min_lr": 0.0012420075234236513, "train_loss": 0.2726951311312568, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008968142816462578, "epoch": 894, "n_parameters": 303924416} {"train_lr": 0.0012391002163804619, "train_min_lr": 0.0012391002163804619, "train_loss": 0.27264988477914953, "train_loss_scale": 103765.33333333333, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 895, "n_parameters": 303924416} {"train_lr": 0.0012361939473993834, "train_min_lr": 0.0012361939473993834, "train_loss": 0.272727502295031, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.009132263829740576, "epoch": 896, "n_parameters": 303924416} {"train_lr": 0.0012332887278263727, "train_min_lr": 0.0012332887278263727, "train_loss": 0.27300974261612654, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.009531892991314331, "epoch": 897, "n_parameters": 303924416} {"train_lr": 0.0012303845690032904, "train_min_lr": 0.0012303845690032904, "train_loss": 0.27263787912372023, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008930312926307894, "epoch": 898, "n_parameters": 303924416} {"train_lr": 0.0012274814822678575, "train_min_lr": 0.0012274814822678575, "train_loss": 0.27261058050386894, "train_loss_scale": 55978.666666666664, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 899, "n_parameters": 303924416} {"train_lr": 0.0012245794789536078, "train_min_lr": 0.0012245794789536078, "train_loss": 0.2725910370966467, "train_loss_scale": 32768.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008950988654620372, "epoch": 900, "n_parameters": 303924416} {"train_lr": 0.0012216785703898449, "train_min_lr": 0.0012216785703898449, "train_loss": 0.27252186886751306, "train_loss_scale": 32768.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00882148007169748, "epoch": 901, "n_parameters": 303924416} {"train_lr": 0.0012187787679016017, "train_min_lr": 0.0012187787679016017, "train_loss": 0.2724736620737717, "train_loss_scale": 32768.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008808380715214672, "epoch": 902, "n_parameters": 303924416} {"train_lr": 0.001215880082809589, "train_min_lr": 0.001215880082809589, "train_loss": 0.27247824723151726, "train_loss_scale": 32768.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008989583506869772, "epoch": 903, "n_parameters": 303924416} {"train_lr": 0.0012129825264301601, "train_min_lr": 0.0012129825264301601, "train_loss": 0.2726747508799562, "train_loss_scale": 32768.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.0091711115778591, "epoch": 904, "n_parameters": 303924416} {"train_lr": 0.0012100861100752568, "train_min_lr": 0.0012100861100752568, "train_loss": 0.2726446748621619, "train_loss_scale": 32768.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.009234302411548411, "epoch": 905, "n_parameters": 303924416} {"train_lr": 0.0012071908450523715, "train_min_lr": 0.0012071908450523715, "train_loss": 0.272613320645924, "train_loss_scale": 61650.05128205128, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.009145430393684179, "epoch": 906, "n_parameters": 303924416} {"train_lr": 0.0012042967426645064, "train_min_lr": 0.0012042967426645064, "train_loss": 0.2726548421674241, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.009388061843287105, "epoch": 907, "n_parameters": 303924416} {"train_lr": 0.0012014038142101181, "train_min_lr": 0.0012014038142101181, "train_loss": 0.2725362782784475, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00897024360557015, "epoch": 908, "n_parameters": 303924416} {"train_lr": 0.0011985120709830882, "train_min_lr": 0.0011985120709830882, "train_loss": 0.27243909563940877, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008875137618862284, "epoch": 909, "n_parameters": 303924416} {"train_lr": 0.0011956215242726605, "train_min_lr": 0.0011956215242726605, "train_loss": 0.27243575332566905, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008946717394372592, "epoch": 910, "n_parameters": 303924416} {"train_lr": 0.0011927321853634168, "train_min_lr": 0.0011927321853634168, "train_loss": 0.27242920564439815, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.009044361053010782, "epoch": 911, "n_parameters": 303924416} {"train_lr": 0.001189844065535221, "train_min_lr": 0.001189844065535221, "train_loss": 0.27238541588676757, "train_loss_scale": 91162.2564102564, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 912, "n_parameters": 303924416} {"train_lr": 0.0011869571760631749, "train_min_lr": 0.0011869571760631749, "train_loss": 0.2724643994946606, "train_loss_scale": 58499.282051282054, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 913, "n_parameters": 303924416} {"train_lr": 0.0011840715282175822, "train_min_lr": 0.0011840715282175822, "train_loss": 0.27232375047587526, "train_loss_scale": 32768.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00897650714390553, "epoch": 914, "n_parameters": 303924416} {"train_lr": 0.0011811871332638945, "train_min_lr": 0.0011811871332638945, "train_loss": 0.2725017120202, "train_loss_scale": 32768.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.009329363342541724, "epoch": 915, "n_parameters": 303924416} {"train_lr": 0.001178304002462676, "train_min_lr": 0.001178304002462676, "train_loss": 0.27235459316915905, "train_loss_scale": 32768.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008946839905081268, "epoch": 916, "n_parameters": 303924416} {"train_lr": 0.0011754221470695527, "train_min_lr": 0.0011754221470695527, "train_loss": 0.272516679060526, "train_loss_scale": 32768.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.009535931450768542, "epoch": 917, "n_parameters": 303924416} {"train_lr": 0.0011725415783351723, "train_min_lr": 0.0011725415783351723, "train_loss": 0.2724149329737068, "train_loss_scale": 32768.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.009075560589106036, "epoch": 918, "n_parameters": 303924416} {"train_lr": 0.0011696623075051608, "train_min_lr": 0.0011696623075051608, "train_loss": 0.2723051245825795, "train_loss_scale": 32768.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00908347609369323, "epoch": 919, "n_parameters": 303924416} {"train_lr": 0.0011667843458200756, "train_min_lr": 0.0011667843458200756, "train_loss": 0.27240689750462294, "train_loss_scale": 59129.4358974359, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.009167764613476511, "epoch": 920, "n_parameters": 303924416} {"train_lr": 0.001163907704515365, "train_min_lr": 0.001163907704515365, "train_loss": 0.2721900422221575, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.008965820659143038, "epoch": 921, "n_parameters": 303924416} {"train_lr": 0.001161032394821319, "train_min_lr": 0.001161032394821319, "train_loss": 0.2722280122644196, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.009005717512291785, "epoch": 922, "n_parameters": 303924416} {"train_lr": 0.0011581584279630355, "train_min_lr": 0.0011581584279630355, "train_loss": 0.2721983566163824, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.009011263188977655, "epoch": 923, "n_parameters": 303924416} {"train_lr": 0.0011552858151603633, "train_min_lr": 0.0011552858151603633, "train_loss": 0.27219237623592984, "train_loss_scale": 47576.61538461538, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 924, "n_parameters": 303924416} {"train_lr": 0.0011524145676278675, "train_min_lr": 0.0011524145676278675, "train_loss": 0.27221420978625804, "train_loss_scale": 32768.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.009142337023662642, "epoch": 925, "n_parameters": 303924416} {"train_lr": 0.001149544696574784, "train_min_lr": 0.001149544696574784, "train_loss": 0.27218116760158384, "train_loss_scale": 32768.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.009159888027426906, "epoch": 926, "n_parameters": 303924416} {"train_lr": 0.0011466762132049761, "train_min_lr": 0.0011466762132049761, "train_loss": 0.27220176955243236, "train_loss_scale": 32768.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.009138090419583023, "epoch": 927, "n_parameters": 303924416} {"train_lr": 0.0011438091287168863, "train_min_lr": 0.0011438091287168863, "train_loss": 0.27218151746246105, "train_loss_scale": 32768.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.009168991245902501, "epoch": 928, "n_parameters": 303924416} {"train_lr": 0.001140943454303497, "train_min_lr": 0.001140943454303497, "train_loss": 0.2721281651622401, "train_loss_scale": 32768.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.009098468440919159, "epoch": 929, "n_parameters": 303924416} {"train_lr": 0.001138079201152288, "train_min_lr": 0.001138079201152288, "train_loss": 0.2721251379668665, "train_loss_scale": 37284.10256410256, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00920427093306222, "epoch": 930, "n_parameters": 303924416} {"train_lr": 0.0011352163804451891, "train_min_lr": 0.0011352163804451891, "train_loss": 0.2720972263188555, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.009109508031262802, "epoch": 931, "n_parameters": 303924416} {"train_lr": 0.0011323550033585377, "train_min_lr": 0.0011323550033585377, "train_loss": 0.2720781888406819, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.009174864043672688, "epoch": 932, "n_parameters": 303924416} {"train_lr": 0.0011294950810630336, "train_min_lr": 0.0011294950810630336, "train_loss": 0.2720115934677709, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.009066087674373427, "epoch": 933, "n_parameters": 303924416} {"train_lr": 0.001126636624723699, "train_min_lr": 0.001126636624723699, "train_loss": 0.2719795033335686, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00909890826099003, "epoch": 934, "n_parameters": 303924416} {"train_lr": 0.001123779645499835, "train_min_lr": 0.001123779645499835, "train_loss": 0.27195734646505654, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.009060372953685239, "epoch": 935, "n_parameters": 303924416} {"train_lr": 0.0011209241545449753, "train_min_lr": 0.0011209241545449753, "train_loss": 0.27191541258556146, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00905507107754835, "epoch": 936, "n_parameters": 303924416} {"train_lr": 0.001118070163006838, "train_min_lr": 0.001118070163006838, "train_loss": 0.2719716376308591, "train_loss_scale": 78979.28205128205, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 937, "n_parameters": 303924416} {"train_lr": 0.0011152176820272937, "train_min_lr": 0.0011152176820272937, "train_loss": 0.27194335819699633, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.009196798102213787, "epoch": 938, "n_parameters": 303924416} {"train_lr": 0.0011123667227423146, "train_min_lr": 0.0011123667227423146, "train_loss": 0.27193738010604507, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.009224262333307892, "epoch": 939, "n_parameters": 303924416} {"train_lr": 0.0011095172962819328, "train_min_lr": 0.0011095172962819328, "train_loss": 0.27199020192253953, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.009325333569461528, "epoch": 940, "n_parameters": 303924416} {"train_lr": 0.0011066694137701917, "train_min_lr": 0.0011066694137701917, "train_loss": 0.27194966482691085, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.009334355383777084, "epoch": 941, "n_parameters": 303924416} {"train_lr": 0.001103823086325113, "train_min_lr": 0.001103823086325113, "train_loss": 0.27186667271113646, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.009080248887841709, "epoch": 942, "n_parameters": 303924416} {"train_lr": 0.0011009783250586459, "train_min_lr": 0.0011009783250586459, "train_loss": 0.27184939668036234, "train_loss_scale": 66586.2564102564, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 943, "n_parameters": 303924416} {"train_lr": 0.001098135141076621, "train_min_lr": 0.001098135141076621, "train_loss": 0.271906309417831, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.009286432389695292, "epoch": 944, "n_parameters": 303924416} {"train_lr": 0.0010952935454787188, "train_min_lr": 0.0010952935454787188, "train_loss": 0.2718395972564721, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.009188390763786932, "epoch": 945, "n_parameters": 303924416} {"train_lr": 0.0010924535493584122, "train_min_lr": 0.0010924535493584122, "train_loss": 0.2718072546711669, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.009150170264407419, "epoch": 946, "n_parameters": 303924416} {"train_lr": 0.0010896151638029327, "train_min_lr": 0.0010896151638029327, "train_loss": 0.27180640620645136, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.009282728263105337, "epoch": 947, "n_parameters": 303924416} {"train_lr": 0.0010867783998932247, "train_min_lr": 0.0010867783998932247, "train_loss": 0.2717348625435709, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.009144764668188797, "epoch": 948, "n_parameters": 303924416} {"train_lr": 0.0010839432687039, "train_min_lr": 0.0010839432687039, "train_loss": 0.27169754101524657, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.009165368410042273, "epoch": 949, "n_parameters": 303924416} {"train_lr": 0.0010811097813031988, "train_min_lr": 0.0010811097813031988, "train_loss": 0.27169910897739613, "train_loss_scale": 110486.97435897436, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.009205292970037613, "epoch": 950, "n_parameters": 303924416} {"train_lr": 0.0010782779487529402, "train_min_lr": 0.0010782779487529402, "train_loss": 0.27164784540983444, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.009197962222596964, "epoch": 951, "n_parameters": 303924416} {"train_lr": 0.0010754477821084867, "train_min_lr": 0.0010754477821084867, "train_loss": 0.27164735115216804, "train_loss_scale": 123510.15384615384, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 952, "n_parameters": 303924416} {"train_lr": 0.0010726192924186942, "train_min_lr": 0.0010726192924186942, "train_loss": 0.27165396212456894, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.009228738493477114, "epoch": 953, "n_parameters": 303924416} {"train_lr": 0.001069792490725876, "train_min_lr": 0.001069792490725876, "train_loss": 0.27167471680634964, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.009296367870261654, "epoch": 954, "n_parameters": 303924416} {"train_lr": 0.0010669673880657497, "train_min_lr": 0.0010669673880657497, "train_loss": 0.2716791786981794, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.009396302313782657, "epoch": 955, "n_parameters": 303924416} {"train_lr": 0.0010641439954674056, "train_min_lr": 0.0010641439954674056, "train_loss": 0.27167348450539297, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.009391648420252098, "epoch": 956, "n_parameters": 303924416} {"train_lr": 0.0010613223239532518, "train_min_lr": 0.0010613223239532518, "train_loss": 0.2715821857832802, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.009250637979652638, "epoch": 957, "n_parameters": 303924416} {"train_lr": 0.001058502384538984, "train_min_lr": 0.001058502384538984, "train_loss": 0.27163933267673623, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.009475062102771914, "epoch": 958, "n_parameters": 303924416} {"train_lr": 0.0010556841882335324, "train_min_lr": 0.0010556841882335324, "train_loss": 0.2716813863834175, "train_loss_scale": 100194.46153846153, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 959, "n_parameters": 303924416} {"train_lr": 0.0010528677460390219, "train_min_lr": 0.0010528677460390219, "train_loss": 0.27162594701617193, "train_loss_scale": 60284.717948717946, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 960, "n_parameters": 303924416} {"train_lr": 0.001050053068950731, "train_min_lr": 0.001050053068950731, "train_loss": 0.2715342025487469, "train_loss_scale": 32768.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.009227767195671989, "epoch": 961, "n_parameters": 303924416} {"train_lr": 0.0010472401679570446, "train_min_lr": 0.0010472401679570446, "train_loss": 0.27156807944918865, "train_loss_scale": 32768.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00938985194079578, "epoch": 962, "n_parameters": 303924416} {"train_lr": 0.0010444290540394176, "train_min_lr": 0.0010444290540394176, "train_loss": 0.2715422273344862, "train_loss_scale": 32768.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00934715289921046, "epoch": 963, "n_parameters": 303924416} {"train_lr": 0.0010416197381723248, "train_min_lr": 0.0010416197381723248, "train_loss": 0.27153415952880794, "train_loss_scale": 32768.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.009448938409033686, "epoch": 964, "n_parameters": 303924416} {"train_lr": 0.001038812231323222, "train_min_lr": 0.001038812231323222, "train_loss": 0.27146495907949525, "train_loss_scale": 32768.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.009313529234928772, "epoch": 965, "n_parameters": 303924416} {"train_lr": 0.0010360065444525053, "train_min_lr": 0.0010360065444525053, "train_loss": 0.2714754734337569, "train_loss_scale": 16909.128205128207, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 966, "n_parameters": 303924416} {"train_lr": 0.0010332026885134641, "train_min_lr": 0.0010332026885134641, "train_loss": 0.2714655021718966, "train_loss_scale": 16384.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.009395248847655378, "epoch": 967, "n_parameters": 303924416} {"train_lr": 0.0010304006744522387, "train_min_lr": 0.0010304006744522387, "train_loss": 0.27152442815713584, "train_loss_scale": 16384.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00947818260353345, "epoch": 968, "n_parameters": 303924416} {"train_lr": 0.00102760051320778, "train_min_lr": 0.00102760051320778, "train_loss": 0.27141713069525, "train_loss_scale": 16384.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.009278321662583413, "epoch": 969, "n_parameters": 303924416} {"train_lr": 0.0010248022157118056, "train_min_lr": 0.0010248022157118056, "train_loss": 0.2714228428667411, "train_loss_scale": 16384.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.009544580933064796, "epoch": 970, "n_parameters": 303924416} {"train_lr": 0.0010220057928887576, "train_min_lr": 0.0010220057928887576, "train_loss": 0.27142565035058236, "train_loss_scale": 16384.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00953321611114706, "epoch": 971, "n_parameters": 303924416} {"train_lr": 0.001019211255655757, "train_min_lr": 0.001019211255655757, "train_loss": 0.27138278583207953, "train_loss_scale": 25521.23076923077, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.009529343271890704, "epoch": 972, "n_parameters": 303924416} {"train_lr": 0.0010164186149225658, "train_min_lr": 0.0010164186149225658, "train_loss": 0.27145950811413616, "train_loss_scale": 32768.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.009688576784892341, "epoch": 973, "n_parameters": 303924416} {"train_lr": 0.0010136278815915433, "train_min_lr": 0.0010136278815915433, "train_loss": 0.27148367991097844, "train_loss_scale": 32768.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.009723471405987557, "epoch": 974, "n_parameters": 303924416} {"train_lr": 0.0010108390665575985, "train_min_lr": 0.0010108390665575985, "train_loss": 0.2714674198277629, "train_loss_scale": 32768.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.009671266778754309, "epoch": 975, "n_parameters": 303924416} {"train_lr": 0.0010080521807081556, "train_min_lr": 0.0010080521807081556, "train_loss": 0.27129220640740526, "train_loss_scale": 32768.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.009428179793012066, "epoch": 976, "n_parameters": 303924416} {"train_lr": 0.0010052672349231044, "train_min_lr": 0.0010052672349231044, "train_loss": 0.27134734093665314, "train_loss_scale": 32768.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.009686382872482331, "epoch": 977, "n_parameters": 303924416} {"train_lr": 0.001002484240074762, "train_min_lr": 0.001002484240074762, "train_loss": 0.2714690423707884, "train_loss_scale": 37599.179487179485, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.009740059081918728, "epoch": 978, "n_parameters": 303924416} {"train_lr": 0.0009997032070278265, "train_min_lr": 0.0009997032070278265, "train_loss": 0.2713129391583304, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.009552189205677655, "epoch": 979, "n_parameters": 303924416} {"train_lr": 0.000996924146639344, "train_min_lr": 0.000996924146639344, "train_loss": 0.27132644411474943, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.009607884734391402, "epoch": 980, "n_parameters": 303924416} {"train_lr": 0.0009941470697586525, "train_min_lr": 0.0009941470697586525, "train_loss": 0.2714225447897871, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.010037145380360575, "epoch": 981, "n_parameters": 303924416} {"train_lr": 0.0009913719872273496, "train_min_lr": 0.0009913719872273496, "train_loss": 0.27147075664311743, "train_loss_scale": 41695.179487179485, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 982, "n_parameters": 303924416} {"train_lr": 0.000988598909879245, "train_min_lr": 0.000988598909879245, "train_loss": 0.2713468755201365, "train_loss_scale": 32768.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00966127402184961, "epoch": 983, "n_parameters": 303924416} {"train_lr": 0.0009858278485403227, "train_min_lr": 0.0009858278485403227, "train_loss": 0.2712975048030225, "train_loss_scale": 32768.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.009691901510963455, "epoch": 984, "n_parameters": 303924416} {"train_lr": 0.000983058814028695, "train_min_lr": 0.000983058814028695, "train_loss": 0.27130176600080746, "train_loss_scale": 32768.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.009852111390314233, "epoch": 985, "n_parameters": 303924416} {"train_lr": 0.0009802918171545627, "train_min_lr": 0.0009802918171545627, "train_loss": 0.2713554790810658, "train_loss_scale": 32768.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.009751982360075299, "epoch": 986, "n_parameters": 303924416} {"train_lr": 0.0009775268687201692, "train_min_lr": 0.0009775268687201692, "train_loss": 0.2712826763357346, "train_loss_scale": 32768.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.009689213576702735, "epoch": 987, "n_parameters": 303924416} {"train_lr": 0.0009747639795197641, "train_min_lr": 0.0009747639795197641, "train_loss": 0.2712551942274261, "train_loss_scale": 40119.794871794875, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 988, "n_parameters": 303924416} {"train_lr": 0.0009720031603395551, "train_min_lr": 0.0009720031603395551, "train_loss": 0.27129591680848253, "train_loss_scale": 32768.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.009902681952390151, "epoch": 989, "n_parameters": 303924416} {"train_lr": 0.0009692444219576709, "train_min_lr": 0.0009692444219576709, "train_loss": 0.27124171283167714, "train_loss_scale": 32768.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00967891343558828, "epoch": 990, "n_parameters": 303924416} {"train_lr": 0.0009664877751441156, "train_min_lr": 0.0009664877751441156, "train_loss": 0.2711967436704211, "train_loss_scale": 32768.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.009725967953459192, "epoch": 991, "n_parameters": 303924416} {"train_lr": 0.0009637332306607262, "train_min_lr": 0.0009637332306607262, "train_loss": 0.27112628440432346, "train_loss_scale": 32768.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.009573963617022412, "epoch": 992, "n_parameters": 303924416} {"train_lr": 0.0009609807992611344, "train_min_lr": 0.0009609807992611344, "train_loss": 0.27132234515216297, "train_loss_scale": 32768.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.010254059440623492, "epoch": 993, "n_parameters": 303924416} {"train_lr": 0.0009582304916907242, "train_min_lr": 0.0009582304916907242, "train_loss": 0.2711903896445456, "train_loss_scale": 30562.46153846154, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 994, "n_parameters": 303924416} {"train_lr": 0.0009554823186865848, "train_min_lr": 0.0009554823186865848, "train_loss": 0.27117205867603517, "train_loss_scale": 16384.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.009951534767587407, "epoch": 995, "n_parameters": 303924416} {"train_lr": 0.0009527362909774747, "train_min_lr": 0.0009527362909774747, "train_loss": 0.2712299197954006, "train_loss_scale": 16384.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.009917437525776526, "epoch": 996, "n_parameters": 303924416} {"train_lr": 0.0009499924192837747, "train_min_lr": 0.0009499924192837747, "train_loss": 0.2711277546164078, "train_loss_scale": 16384.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.009721519965797855, "epoch": 997, "n_parameters": 303924416} {"train_lr": 0.0009472507143174505, "train_min_lr": 0.0009472507143174505, "train_loss": 0.2710683155321301, "train_loss_scale": 16384.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00964360474011837, "epoch": 998, "n_parameters": 303924416} {"train_lr": 0.0009445111867820084, "train_min_lr": 0.0009445111867820084, "train_loss": 0.2710217721790123, "train_loss_scale": 16384.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.0097330179817688, "epoch": 999, "n_parameters": 303924416} {"train_lr": 0.0009417738473724552, "train_min_lr": 0.0009417738473724552, "train_loss": 0.2710059460801765, "train_loss_scale": 9951.179487179486, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 1000, "n_parameters": 303924416} {"train_lr": 0.0009390387067752538, "train_min_lr": 0.0009390387067752538, "train_loss": 0.2711889720103966, "train_loss_scale": 8192.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.010213952099617857, "epoch": 1001, "n_parameters": 303924416} {"train_lr": 0.000936305775668283, "train_min_lr": 0.000936305775668283, "train_loss": 0.2710424324957272, "train_loss_scale": 8192.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00980993274014252, "epoch": 1002, "n_parameters": 303924416} {"train_lr": 0.0009335750647207968, "train_min_lr": 0.0009335750647207968, "train_loss": 0.2711209724418437, "train_loss_scale": 8192.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.010106976490276746, "epoch": 1003, "n_parameters": 303924416} {"train_lr": 0.0009308465845933817, "train_min_lr": 0.0009308465845933817, "train_loss": 0.27115282609772223, "train_loss_scale": 8192.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.010183839822928302, "epoch": 1004, "n_parameters": 303924416} {"train_lr": 0.0009281203459379158, "train_min_lr": 0.0009281203459379158, "train_loss": 0.27097845261988157, "train_loss_scale": 8192.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.009753438984402098, "epoch": 1005, "n_parameters": 303924416} {"train_lr": 0.0009253963593975229, "train_min_lr": 0.0009253963593975229, "train_loss": 0.27104668453849184, "train_loss_scale": 11264.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.009940477658230334, "epoch": 1006, "n_parameters": 303924416} {"train_lr": 0.0009226746356065399, "train_min_lr": 0.0009226746356065399, "train_loss": 0.2709041464333542, "train_loss_scale": 16384.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.009798023593015013, "epoch": 1007, "n_parameters": 303924416} {"train_lr": 0.0009199551851904667, "train_min_lr": 0.0009199551851904667, "train_loss": 0.2709586301143878, "train_loss_scale": 16384.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.009927922943368172, "epoch": 1008, "n_parameters": 303924416} {"train_lr": 0.0009172380187659294, "train_min_lr": 0.0009172380187659294, "train_loss": 0.2709169558350904, "train_loss_scale": 16384.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.009886154737801123, "epoch": 1009, "n_parameters": 303924416} {"train_lr": 0.000914523146940636, "train_min_lr": 0.000914523146940636, "train_loss": 0.2707933413867767, "train_loss_scale": 16384.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.009804049166492544, "epoch": 1010, "n_parameters": 303924416} {"train_lr": 0.0009118105803133375, "train_min_lr": 0.0009118105803133375, "train_loss": 0.270846650768549, "train_loss_scale": 16384.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.009872670783900106, "epoch": 1011, "n_parameters": 303924416} {"train_lr": 0.000909100329473786, "train_min_lr": 0.000909100329473786, "train_loss": 0.27080924480926627, "train_loss_scale": 16384.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.009849730335796872, "epoch": 1012, "n_parameters": 303924416} {"train_lr": 0.0009063924050026917, "train_min_lr": 0.0009063924050026917, "train_loss": 0.27082164768272865, "train_loss_scale": 32190.358974358973, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.009939974185843498, "epoch": 1013, "n_parameters": 303924416} {"train_lr": 0.0009036868174716841, "train_min_lr": 0.0009036868174716841, "train_loss": 0.2707714997918512, "train_loss_scale": 32768.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.009891418158076704, "epoch": 1014, "n_parameters": 303924416} {"train_lr": 0.0009009835774432676, "train_min_lr": 0.0009009835774432676, "train_loss": 0.2707919735873405, "train_loss_scale": 32768.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.009982438572945122, "epoch": 1015, "n_parameters": 303924416} {"train_lr": 0.000898282695470784, "train_min_lr": 0.000898282695470784, "train_loss": 0.2707392168111908, "train_loss_scale": 32768.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.009863666569193205, "epoch": 1016, "n_parameters": 303924416} {"train_lr": 0.0008955841820983682, "train_min_lr": 0.0008955841820983682, "train_loss": 0.2707836599871277, "train_loss_scale": 32768.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.010083933501170041, "epoch": 1017, "n_parameters": 303924416} {"train_lr": 0.0008928880478609084, "train_min_lr": 0.0008928880478609084, "train_loss": 0.2707022232367681, "train_loss_scale": 32768.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.009912284920947293, "epoch": 1018, "n_parameters": 303924416} {"train_lr": 0.000890194303284004, "train_min_lr": 0.000890194303284004, "train_loss": 0.2707589441993966, "train_loss_scale": 50937.4358974359, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.009981519769495115, "epoch": 1019, "n_parameters": 303924416} {"train_lr": 0.0008875029588839267, "train_min_lr": 0.0008875029588839267, "train_loss": 0.2706388184914174, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.009920012510309998, "epoch": 1020, "n_parameters": 303924416} {"train_lr": 0.0008848140251675762, "train_min_lr": 0.0008848140251675762, "train_loss": 0.2706551041442137, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.009990072042012636, "epoch": 1021, "n_parameters": 303924416} {"train_lr": 0.0008821275126324441, "train_min_lr": 0.0008821275126324441, "train_loss": 0.2707403000408354, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.010267985020525372, "epoch": 1022, "n_parameters": 303924416} {"train_lr": 0.0008794434317665664, "train_min_lr": 0.0008794434317665664, "train_loss": 0.2706095652732377, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.00990376634021791, "epoch": 1023, "n_parameters": 303924416} {"train_lr": 0.0008767617930484874, "train_min_lr": 0.0008767617930484874, "train_loss": 0.27059500025680816, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.009947940847502114, "epoch": 1024, "n_parameters": 303924416} {"train_lr": 0.000874082606947218, "train_min_lr": 0.000874082606947218, "train_loss": 0.2705620580460303, "train_loss_scale": 74988.30769230769, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.009950312266412836, "epoch": 1025, "n_parameters": 303924416} {"train_lr": 0.0008714058839221914, "train_min_lr": 0.0008714058839221914, "train_loss": 0.27059387338037294, "train_loss_scale": 90112.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 1026, "n_parameters": 303924416} {"train_lr": 0.0008687316344232313, "train_min_lr": 0.0008687316344232313, "train_loss": 0.27055024155654395, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.010044980964336831, "epoch": 1027, "n_parameters": 303924416} {"train_lr": 0.0008660598688904959, "train_min_lr": 0.0008660598688904959, "train_loss": 0.2705584106435522, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.010014470744257173, "epoch": 1028, "n_parameters": 303924416} {"train_lr": 0.0008633905977544545, "train_min_lr": 0.0008633905977544545, "train_loss": 0.27051322430503577, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.009955498643625433, "epoch": 1029, "n_parameters": 303924416} {"train_lr": 0.0008607238314358315, "train_min_lr": 0.0008607238314358315, "train_loss": 0.2704094331024979, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.009896625788548054, "epoch": 1030, "n_parameters": 303924416} {"train_lr": 0.0008580595803455788, "train_min_lr": 0.0008580595803455788, "train_loss": 0.27051235393931466, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.010077168765024114, "epoch": 1031, "n_parameters": 303924416} {"train_lr": 0.0008553978548848254, "train_min_lr": 0.0008553978548848254, "train_loss": 0.2705106746363573, "train_loss_scale": 65746.05128205128, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 1032, "n_parameters": 303924416} {"train_lr": 0.0008527386654448397, "train_min_lr": 0.0008527386654448397, "train_loss": 0.27058726480194867, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.010503884948169192, "epoch": 1033, "n_parameters": 303924416} {"train_lr": 0.0008500820224069921, "train_min_lr": 0.0008500820224069921, "train_loss": 0.27040063629213434, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.009987788048023596, "epoch": 1034, "n_parameters": 303924416} {"train_lr": 0.0008474279361427082, "train_min_lr": 0.0008474279361427082, "train_loss": 0.27041526502058005, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.010083274085982105, "epoch": 1035, "n_parameters": 303924416} {"train_lr": 0.0008447764170134383, "train_min_lr": 0.0008447764170134383, "train_loss": 0.2703632124795172, "train_loss_scale": 64170.666666666664, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 1036, "n_parameters": 303924416} {"train_lr": 0.0008421274753706043, "train_min_lr": 0.0008421274753706043, "train_loss": 0.2703042330059151, "train_loss_scale": 32768.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.009974857938523667, "epoch": 1037, "n_parameters": 303924416} {"train_lr": 0.0008394811215555701, "train_min_lr": 0.0008394811215555701, "train_loss": 0.2703611626355455, "train_loss_scale": 32768.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.010248686930236336, "epoch": 1038, "n_parameters": 303924416} {"train_lr": 0.000836837365899592, "train_min_lr": 0.000836837365899592, "train_loss": 0.27035514997222865, "train_loss_scale": 20742.5641025641, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 1039, "n_parameters": 303924416} {"train_lr": 0.0008315576903390954, "train_min_lr": 0.0008315576903390954, "train_loss": 0.27038761388617927, "train_loss_scale": 16384.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.010250081323111096, "epoch": 1041, "n_parameters": 303924416} {"train_lr": 0.000828921791046216, "train_min_lr": 0.000828921791046216, "train_loss": 0.27023046997745925, "train_loss_scale": 16384.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.01007766070930908, "epoch": 1042, "n_parameters": 303924416} {"train_lr": 0.0008262885311355988, "train_min_lr": 0.0008262885311355988, "train_loss": 0.27026809085136616, "train_loss_scale": 16384.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.010159156306121403, "epoch": 1043, "n_parameters": 303924416} {"train_lr": 0.0008236579208873839, "train_min_lr": 0.0008236579208873839, "train_loss": 0.27026347598084843, "train_loss_scale": 16384.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.010454302020848561, "epoch": 1044, "n_parameters": 303924416} {"train_lr": 0.00082102997057137, "train_min_lr": 0.00082102997057137, "train_loss": 0.27030434066620773, "train_loss_scale": 16384.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.010336515636971364, "epoch": 1045, "n_parameters": 303924416} {"train_lr": 0.0008184046904469691, "train_min_lr": 0.0008184046904469691, "train_loss": 0.270228749830634, "train_loss_scale": 21687.79487179487, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.010214597485267008, "epoch": 1046, "n_parameters": 303924416} {"train_lr": 0.0008157820907631688, "train_min_lr": 0.0008157820907631688, "train_loss": 0.2702207338088789, "train_loss_scale": 32768.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.010359842237682106, "epoch": 1047, "n_parameters": 303924416} {"train_lr": 0.0008131621817584946, "train_min_lr": 0.0008131621817584946, "train_loss": 0.2703077497605521, "train_loss_scale": 32768.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.010548870335930051, "epoch": 1048, "n_parameters": 303924416} {"train_lr": 0.000810544973660965, "train_min_lr": 0.000810544973660965, "train_loss": 0.27025642678237116, "train_loss_scale": 32768.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.010365263561909206, "epoch": 1049, "n_parameters": 303924416} {"train_lr": 0.0008079304766880569, "train_min_lr": 0.0008079304766880569, "train_loss": 0.27019589795814586, "train_loss_scale": 32768.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.010221697718430406, "epoch": 1050, "n_parameters": 303924416} {"train_lr": 0.0008053187010466622, "train_min_lr": 0.0008053187010466622, "train_loss": 0.27012060329807586, "train_loss_scale": 17119.17948717949, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 1051, "n_parameters": 303924416} {"train_lr": 0.0008027096569330467, "train_min_lr": 0.0008027096569330467, "train_loss": 0.27018728954956317, "train_loss_scale": 16384.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.010568809901507428, "epoch": 1052, "n_parameters": 303924416} {"train_lr": 0.0008001033545328149, "train_min_lr": 0.0008001033545328149, "train_loss": 0.27014036853021633, "train_loss_scale": 16384.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.010471179687346403, "epoch": 1053, "n_parameters": 303924416} {"train_lr": 0.0007974998040208676, "train_min_lr": 0.0007974998040208676, "train_loss": 0.27010472449593437, "train_loss_scale": 13206.97435897436, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 1054, "n_parameters": 303924416} {"train_lr": 0.0007948990155613614, "train_min_lr": 0.0007948990155613614, "train_loss": 0.27010331474220717, "train_loss_scale": 8192.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.010288348666225106, "epoch": 1055, "n_parameters": 303924416} {"train_lr": 0.0007923009993076708, "train_min_lr": 0.0007923009993076708, "train_loss": 0.2700497434468558, "train_loss_scale": 8192.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.010355755680789933, "epoch": 1056, "n_parameters": 303924416} {"train_lr": 0.0007897057654023456, "train_min_lr": 0.0007897057654023456, "train_loss": 0.27003508566830975, "train_loss_scale": 8192.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.010386534357586732, "epoch": 1057, "n_parameters": 303924416} {"train_lr": 0.0007871133239770778, "train_min_lr": 0.0007871133239770778, "train_loss": 0.27004714065697044, "train_loss_scale": 8192.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.010525340605407761, "epoch": 1058, "n_parameters": 303924416} {"train_lr": 0.0007845236851526527, "train_min_lr": 0.0007845236851526527, "train_loss": 0.2700544646403824, "train_loss_scale": 8192.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.010606333159674436, "epoch": 1059, "n_parameters": 303924416} {"train_lr": 0.0007819368590389165, "train_min_lr": 0.0007819368590389165, "train_loss": 0.27005995800479865, "train_loss_scale": 8192.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.010558034782894911, "epoch": 1060, "n_parameters": 303924416} {"train_lr": 0.0007793528557347355, "train_min_lr": 0.0007793528557347355, "train_loss": 0.27002325281202316, "train_loss_scale": 16200.205128205129, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.01054629512453595, "epoch": 1061, "n_parameters": 303924416} {"train_lr": 0.000776771685327956, "train_min_lr": 0.000776771685327956, "train_loss": 0.26998781042698866, "train_loss_scale": 12918.153846153846, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 1062, "n_parameters": 303924416} {"train_lr": 0.0007741933578953627, "train_min_lr": 0.0007741933578953627, "train_loss": 0.2699335760687693, "train_loss_scale": 8192.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.010353791151339045, "epoch": 1063, "n_parameters": 303924416} {"train_lr": 0.0007716178835026435, "train_min_lr": 0.0007716178835026435, "train_loss": 0.2699709190533329, "train_loss_scale": 8192.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.010610867515564538, "epoch": 1064, "n_parameters": 303924416} {"train_lr": 0.0007690452722043463, "train_min_lr": 0.0007690452722043463, "train_loss": 0.26999719250195015, "train_loss_scale": 8192.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.010667277595553642, "epoch": 1065, "n_parameters": 303924416} {"train_lr": 0.000766475534043844, "train_min_lr": 0.000766475534043844, "train_loss": 0.26985506078777594, "train_loss_scale": 8192.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.010351026215805458, "epoch": 1066, "n_parameters": 303924416} {"train_lr": 0.0007639086790532912, "train_min_lr": 0.0007639086790532912, "train_loss": 0.26990737378274876, "train_loss_scale": 8192.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.01058790899025133, "epoch": 1067, "n_parameters": 303924416} {"train_lr": 0.0007613447172535847, "train_min_lr": 0.0007613447172535847, "train_loss": 0.2700319993160426, "train_loss_scale": 8297.02564102564, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.010903902605581934, "epoch": 1068, "n_parameters": 303924416} {"train_lr": 0.0007587836586543333, "train_min_lr": 0.0007587836586543333, "train_loss": 0.2698839027643538, "train_loss_scale": 16384.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.01045106349286074, "epoch": 1069, "n_parameters": 303924416} {"train_lr": 0.0007562255132538018, "train_min_lr": 0.0007562255132538018, "train_loss": 0.26984509339150137, "train_loss_scale": 16384.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.010701583101390263, "epoch": 1070, "n_parameters": 303924416} {"train_lr": 0.000753670291038892, "train_min_lr": 0.000753670291038892, "train_loss": 0.2698280376483662, "train_loss_scale": 16384.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.010559330619752215, "epoch": 1071, "n_parameters": 303924416} {"train_lr": 0.0007511180019850862, "train_min_lr": 0.0007511180019850862, "train_loss": 0.26969086876115167, "train_loss_scale": 16384.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.01037787416508087, "epoch": 1072, "n_parameters": 303924416} {"train_lr": 0.0007485686560564195, "train_min_lr": 0.0007485686560564195, "train_loss": 0.2697684684386238, "train_loss_scale": 16384.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.010634165182590293, "epoch": 1073, "n_parameters": 303924416} {"train_lr": 0.0007460222632054375, "train_min_lr": 0.0007460222632054375, "train_loss": 0.26970516747902507, "train_loss_scale": 16384.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.010619265173609631, "epoch": 1074, "n_parameters": 303924416} {"train_lr": 0.0007434788333731559, "train_min_lr": 0.0007434788333731559, "train_loss": 0.26969990907356334, "train_loss_scale": 26256.410256410258, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.010550776809955446, "epoch": 1075, "n_parameters": 303924416} {"train_lr": 0.0007409383764890203, "train_min_lr": 0.0007409383764890203, "train_loss": 0.26969178861532456, "train_loss_scale": 32768.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.010675560718832107, "epoch": 1076, "n_parameters": 303924416} {"train_lr": 0.0007384009024708765, "train_min_lr": 0.0007384009024708765, "train_loss": 0.26964811366409636, "train_loss_scale": 32768.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.010450129239008022, "epoch": 1077, "n_parameters": 303924416} {"train_lr": 0.000735866421224917, "train_min_lr": 0.000735866421224917, "train_loss": 0.2696367262964113, "train_loss_scale": 32768.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.01070986120388485, "epoch": 1078, "n_parameters": 303924416} {"train_lr": 0.0007333349426456595, "train_min_lr": 0.0007333349426456595, "train_loss": 0.2696944894269109, "train_loss_scale": 32768.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.01089066102539595, "epoch": 1079, "n_parameters": 303924416} {"train_lr": 0.0007308064766158923, "train_min_lr": 0.0007308064766158923, "train_loss": 0.2697394323260643, "train_loss_scale": 32768.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.010912505085938252, "epoch": 1080, "n_parameters": 303924416} {"train_lr": 0.0007282810330066472, "train_min_lr": 0.0007282810330066472, "train_loss": 0.26965789867636675, "train_loss_scale": 39069.53846153846, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.010664265244626082, "epoch": 1081, "n_parameters": 303924416} {"train_lr": 0.0007257586216771538, "train_min_lr": 0.0007257586216771538, "train_loss": 0.2696712630030771, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.01076310821987975, "epoch": 1082, "n_parameters": 303924416} {"train_lr": 0.0007232392524748043, "train_min_lr": 0.0007232392524748043, "train_loss": 0.2695316548178641, "train_loss_scale": 57869.1282051282, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 1083, "n_parameters": 303924416} {"train_lr": 0.0007207229352351171, "train_min_lr": 0.0007207229352351171, "train_loss": 0.26953622645949227, "train_loss_scale": 32768.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.010664504364682123, "epoch": 1084, "n_parameters": 303924416} {"train_lr": 0.0007182096797816934, "train_min_lr": 0.0007182096797816934, "train_loss": 0.2695833078185574, "train_loss_scale": 32768.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.010877272985780086, "epoch": 1085, "n_parameters": 303924416} {"train_lr": 0.0007156994959261803, "train_min_lr": 0.0007156994959261803, "train_loss": 0.26967001064584994, "train_loss_scale": 32768.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.011246056776517669, "epoch": 1086, "n_parameters": 303924416} {"train_lr": 0.0007131923934682372, "train_min_lr": 0.0007131923934682372, "train_loss": 0.2695803850238474, "train_loss_scale": 32768.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.010729293356864499, "epoch": 1087, "n_parameters": 303924416} {"train_lr": 0.0007106883821954903, "train_min_lr": 0.0007106883821954903, "train_loss": 0.2695528810384134, "train_loss_scale": 32768.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.010767768349092549, "epoch": 1088, "n_parameters": 303924416} {"train_lr": 0.0007081874718835006, "train_min_lr": 0.0007081874718835006, "train_loss": 0.2696073255770338, "train_loss_scale": 32768.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.010955300486574952, "epoch": 1089, "n_parameters": 303924416} {"train_lr": 0.0007056896722957201, "train_min_lr": 0.0007056896722957201, "train_loss": 0.2695028386048925, "train_loss_scale": 59759.58974358974, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.010750204441137612, "epoch": 1090, "n_parameters": 303924416} {"train_lr": 0.0007031949931834597, "train_min_lr": 0.0007031949931834597, "train_loss": 0.269406587148133, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.010709565386582071, "epoch": 1091, "n_parameters": 303924416} {"train_lr": 0.0007007034442858478, "train_min_lr": 0.0007007034442858478, "train_loss": 0.269490345646866, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.010971129570419017, "epoch": 1092, "n_parameters": 303924416} {"train_lr": 0.0006982150353297889, "train_min_lr": 0.0006982150353297889, "train_loss": 0.2694431717734402, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.010799877850028375, "epoch": 1093, "n_parameters": 303924416} {"train_lr": 0.0006957297760299335, "train_min_lr": 0.0006957297760299335, "train_loss": 0.2693810286597373, "train_loss_scale": 38754.46153846154, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 1094, "n_parameters": 303924416} {"train_lr": 0.0006932476760886348, "train_min_lr": 0.0006932476760886348, "train_loss": 0.2693854237321573, "train_loss_scale": 32768.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.010949823703879539, "epoch": 1095, "n_parameters": 303924416} {"train_lr": 0.0006907687451959105, "train_min_lr": 0.0006907687451959105, "train_loss": 0.2693342457089621, "train_loss_scale": 32768.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.010797633225313174, "epoch": 1096, "n_parameters": 303924416} {"train_lr": 0.0006882929930294079, "train_min_lr": 0.0006882929930294079, "train_loss": 0.269355470326562, "train_loss_scale": 32768.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.01086748288705563, "epoch": 1097, "n_parameters": 303924416} {"train_lr": 0.0006858204292543649, "train_min_lr": 0.0006858204292543649, "train_loss": 0.26932226116052616, "train_loss_scale": 32768.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.010928906073483327, "epoch": 1098, "n_parameters": 303924416} {"train_lr": 0.0006833510635235713, "train_min_lr": 0.0006833510635235713, "train_loss": 0.26926071253509665, "train_loss_scale": 32768.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.010734456131616846, "epoch": 1099, "n_parameters": 303924416} {"train_lr": 0.0006808849054773309, "train_min_lr": 0.0006808849054773309, "train_loss": 0.2692115622638271, "train_loss_scale": 34763.48717948718, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 1100, "n_parameters": 303924416} {"train_lr": 0.0006784219647434278, "train_min_lr": 0.0006784219647434278, "train_loss": 0.26920568665716416, "train_loss_scale": 32768.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.010915053521808332, "epoch": 1101, "n_parameters": 303924416} {"train_lr": 0.0006759622509370837, "train_min_lr": 0.0006759622509370837, "train_loss": 0.2691902283561201, "train_loss_scale": 32768.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.010916675049931003, "epoch": 1102, "n_parameters": 303924416} {"train_lr": 0.0006735057736609214, "train_min_lr": 0.0006735057736609214, "train_loss": 0.26926175394477564, "train_loss_scale": 32768.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.011240462202411622, "epoch": 1103, "n_parameters": 303924416} {"train_lr": 0.0006710525425049303, "train_min_lr": 0.0006710525425049303, "train_loss": 0.26925791423612583, "train_loss_scale": 32768.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.011052524154552091, "epoch": 1104, "n_parameters": 303924416} {"train_lr": 0.0006686025670464282, "train_min_lr": 0.0006686025670464282, "train_loss": 0.2693322899315554, "train_loss_scale": 32768.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.011401565145486249, "epoch": 1105, "n_parameters": 303924416} {"train_lr": 0.0006661558568500193, "train_min_lr": 0.0006661558568500193, "train_loss": 0.26926907673119926, "train_loss_scale": 32768.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.011274562721761564, "epoch": 1106, "n_parameters": 303924416} {"train_lr": 0.0006637124214675638, "train_min_lr": 0.0006637124214675638, "train_loss": 0.26940599317626596, "train_loss_scale": 63435.48717948718, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.011706912806496406, "epoch": 1107, "n_parameters": 303924416} {"train_lr": 0.0006612722704381341, "train_min_lr": 0.0006612722704381341, "train_loss": 0.2692844840184523, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.011233085823149826, "epoch": 1108, "n_parameters": 303924416} {"train_lr": 0.000658835413287983, "train_min_lr": 0.000658835413287983, "train_loss": 0.2692546571951168, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.011331766449774686, "epoch": 1109, "n_parameters": 303924416} {"train_lr": 0.0006564018595305038, "train_min_lr": 0.0006564018595305038, "train_loss": 0.26917889885580504, "train_loss_scale": 35708.717948717946, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 1110, "n_parameters": 303924416} {"train_lr": 0.0006539716186661912, "train_min_lr": 0.0006539716186661912, "train_loss": 0.26924631687012524, "train_loss_scale": 32768.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.011340387404347077, "epoch": 1111, "n_parameters": 303924416} {"train_lr": 0.0006515447001826097, "train_min_lr": 0.0006515447001826097, "train_loss": 0.26913784815070146, "train_loss_scale": 32768.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.011094161951078627, "epoch": 1112, "n_parameters": 303924416} {"train_lr": 0.000649121113554352, "train_min_lr": 0.000649121113554352, "train_loss": 0.2690875974024097, "train_loss_scale": 32768.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.011138731225704154, "epoch": 1113, "n_parameters": 303924416} {"train_lr": 0.0006467008682430024, "train_min_lr": 0.0006467008682430024, "train_loss": 0.26907897976716644, "train_loss_scale": 32768.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.011159648491929356, "epoch": 1114, "n_parameters": 303924416} {"train_lr": 0.0006442839736971013, "train_min_lr": 0.0006442839736971013, "train_loss": 0.2690036877368887, "train_loss_scale": 32768.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.01119176967917249, "epoch": 1115, "n_parameters": 303924416} {"train_lr": 0.0006418704393521103, "train_min_lr": 0.0006418704393521103, "train_loss": 0.2690022212650197, "train_loss_scale": 49152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.011204819908389488, "epoch": 1116, "n_parameters": 303924416} {"train_lr": 0.0006394602746303688, "train_min_lr": 0.0006394602746303688, "train_loss": 0.26900831933240765, "train_loss_scale": 39279.58974358974, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 1117, "n_parameters": 303924416} {"train_lr": 0.0006370534889410649, "train_min_lr": 0.0006370534889410649, "train_loss": 0.2689413563139402, "train_loss_scale": 32768.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.011097089324791271, "epoch": 1118, "n_parameters": 303924416} {"train_lr": 0.0006346500916801923, "train_min_lr": 0.0006346500916801923, "train_loss": 0.2689041705796113, "train_loss_scale": 32768.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.011118938331492245, "epoch": 1119, "n_parameters": 303924416} {"train_lr": 0.0006298534999615448, "train_min_lr": 0.0006298534999615448, "train_loss": 0.2688997525226277, "train_loss_scale": 32768.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.011235806114899997, "epoch": 1121, "n_parameters": 303924416} {"train_lr": 0.0006274603242294721, "train_min_lr": 0.0006274603242294721, "train_loss": 0.2689996985688758, "train_loss_scale": 32768.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.011686700626085393, "epoch": 1122, "n_parameters": 303924416} {"train_lr": 0.0006250705743771617, "train_min_lr": 0.0006250705743771617, "train_loss": 0.26882375440357303, "train_loss_scale": 32768.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.011121839359522058, "epoch": 1123, "n_parameters": 303924416} {"train_lr": 0.000622684259734102, "train_min_lr": 0.000622684259734102, "train_loss": 0.26881910165927064, "train_loss_scale": 45581.1282051282, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.011182149910988906, "epoch": 1124, "n_parameters": 303924416} {"train_lr": 0.0006203013896163704, "train_min_lr": 0.0006203013896163704, "train_loss": 0.2687930314658353, "train_loss_scale": 35918.769230769234, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 1125, "n_parameters": 303924416} {"train_lr": 0.0006179219733265951, "train_min_lr": 0.0006179219733265951, "train_loss": 0.26880971583215374, "train_loss_scale": 32768.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.01126081578564854, "epoch": 1126, "n_parameters": 303924416} {"train_lr": 0.0006155460201539221, "train_min_lr": 0.0006155460201539221, "train_loss": 0.2688118311672065, "train_loss_scale": 32768.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.011566819888778413, "epoch": 1127, "n_parameters": 303924416} {"train_lr": 0.0006131735393739788, "train_min_lr": 0.0006131735393739788, "train_loss": 0.2688723001181363, "train_loss_scale": 32768.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.01153231892758646, "epoch": 1128, "n_parameters": 303924416} {"train_lr": 0.0006108045402488355, "train_min_lr": 0.0006108045402488355, "train_loss": 0.26889687752685487, "train_loss_scale": 32768.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.011734627088746773, "epoch": 1129, "n_parameters": 303924416} {"train_lr": 0.000608439032026968, "train_min_lr": 0.000608439032026968, "train_loss": 0.2688660311620109, "train_loss_scale": 27674.25641025641, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 1130, "n_parameters": 303924416} {"train_lr": 0.0006060770239432257, "train_min_lr": 0.0006060770239432257, "train_loss": 0.2690228328544599, "train_loss_scale": 16384.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.012193537129160877, "epoch": 1131, "n_parameters": 303924416} {"train_lr": 0.0006037185252187955, "train_min_lr": 0.0006037185252187955, "train_loss": 0.26902190775562745, "train_loss_scale": 16384.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.012067999248989882, "epoch": 1132, "n_parameters": 303924416} {"train_lr": 0.0006013635450611617, "train_min_lr": 0.0006013635450611617, "train_loss": 0.26886526378205955, "train_loss_scale": 15753.846153846154, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 1133, "n_parameters": 303924416} {"train_lr": 0.0005990120926640702, "train_min_lr": 0.0005990120926640702, "train_loss": 0.2689895704119968, "train_loss_scale": 8192.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.012295059259765996, "epoch": 1134, "n_parameters": 303924416} {"train_lr": 0.0005966641772074993, "train_min_lr": 0.0005966641772074993, "train_loss": 0.26892369979013425, "train_loss_scale": 8192.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.011786102797859946, "epoch": 1135, "n_parameters": 303924416} {"train_lr": 0.0005943198078576163, "train_min_lr": 0.0005943198078576163, "train_loss": 0.2687391191452312, "train_loss_scale": 8192.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.01148550655059994, "epoch": 1136, "n_parameters": 303924416} {"train_lr": 0.0005919789937667458, "train_min_lr": 0.0005919789937667458, "train_loss": 0.268691162363841, "train_loss_scale": 8192.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.011376608795940112, "epoch": 1137, "n_parameters": 303924416} {"train_lr": 0.0005896417440733318, "train_min_lr": 0.0005896417440733318, "train_loss": 0.268770096047471, "train_loss_scale": 8192.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.012016978255138757, "epoch": 1138, "n_parameters": 303924416} {"train_lr": 0.0005873080679019029, "train_min_lr": 0.0005873080679019029, "train_loss": 0.26868934692062724, "train_loss_scale": 8192.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.011618443212519662, "epoch": 1139, "n_parameters": 303924416} {"train_lr": 0.0005849779743630389, "train_min_lr": 0.0005849779743630389, "train_loss": 0.2686571376278805, "train_loss_scale": 13653.333333333334, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.0117363064424493, "epoch": 1140, "n_parameters": 303924416} {"train_lr": 0.00058265147255333, "train_min_lr": 0.00058265147255333, "train_loss": 0.26867773382363314, "train_loss_scale": 16384.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.01187565607818751, "epoch": 1141, "n_parameters": 303924416} {"train_lr": 0.0005803285715553476, "train_min_lr": 0.0005803285715553476, "train_loss": 0.268582371627697, "train_loss_scale": 16384.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.011473730242309662, "epoch": 1142, "n_parameters": 303924416} {"train_lr": 0.0005780092804376041, "train_min_lr": 0.0005780092804376041, "train_loss": 0.26857481702851754, "train_loss_scale": 16384.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.01165615868432304, "epoch": 1143, "n_parameters": 303924416} {"train_lr": 0.0005756936082545175, "train_min_lr": 0.0005756936082545175, "train_loss": 0.26854762146010613, "train_loss_scale": 16384.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.011500262156821405, "epoch": 1144, "n_parameters": 303924416} {"train_lr": 0.0005733815640463811, "train_min_lr": 0.0005733815640463811, "train_loss": 0.26853750869392967, "train_loss_scale": 16384.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.011706907926007913, "epoch": 1145, "n_parameters": 303924416} {"train_lr": 0.0005710731568393219, "train_min_lr": 0.0005710731568393219, "train_loss": 0.2685963691224177, "train_loss_scale": 20585.02564102564, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.011955590509714033, "epoch": 1146, "n_parameters": 303924416} {"train_lr": 0.0005687683956452703, "train_min_lr": 0.0005687683956452703, "train_loss": 0.268717632891658, "train_loss_scale": 32768.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.012517463624811707, "epoch": 1147, "n_parameters": 303924416} {"train_lr": 0.0005664672894619201, "train_min_lr": 0.0005664672894619201, "train_loss": 0.26860335239101774, "train_loss_scale": 32768.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.011843660106070531, "epoch": 1148, "n_parameters": 303924416} {"train_lr": 0.0005641698472727003, "train_min_lr": 0.0005641698472727003, "train_loss": 0.2685067263515427, "train_loss_scale": 32242.871794871793, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 1149, "n_parameters": 303924416} {"train_lr": 0.0005618760780467304, "train_min_lr": 0.0005618760780467304, "train_loss": 0.26847395052810985, "train_loss_scale": 16384.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.011816249088718532, "epoch": 1150, "n_parameters": 303924416} {"train_lr": 0.0005595859907387952, "train_min_lr": 0.0005595859907387952, "train_loss": 0.26857042984248924, "train_loss_scale": 16384.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.012179395718237337, "epoch": 1151, "n_parameters": 303924416} {"train_lr": 0.0005572995942893032, "train_min_lr": 0.0005572995942893032, "train_loss": 0.26845006850691366, "train_loss_scale": 16384.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.011799207978093853, "epoch": 1152, "n_parameters": 303924416} {"train_lr": 0.0005550168976242548, "train_min_lr": 0.0005550168976242548, "train_loss": 0.2684940804148284, "train_loss_scale": 16384.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.012183074997618603, "epoch": 1153, "n_parameters": 303924416} {"train_lr": 0.0005527379096552076, "train_min_lr": 0.0005527379096552076, "train_loss": 0.2684851355570106, "train_loss_scale": 16384.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.012018969058632277, "epoch": 1154, "n_parameters": 303924416} {"train_lr": 0.000550462639279237, "train_min_lr": 0.000550462639279237, "train_loss": 0.2683870519648712, "train_loss_scale": 16384.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.011684534145901218, "epoch": 1155, "n_parameters": 303924416} {"train_lr": 0.0005481910953789097, "train_min_lr": 0.0005481910953789097, "train_loss": 0.26834910221768016, "train_loss_scale": 26571.48717948718, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.011916448869623052, "epoch": 1156, "n_parameters": 303924416} {"train_lr": 0.0005459232868222405, "train_min_lr": 0.0005459232868222405, "train_loss": 0.26834516366645217, "train_loss_scale": 32768.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.011987615522976296, "epoch": 1157, "n_parameters": 303924416} {"train_lr": 0.0005436592224626639, "train_min_lr": 0.0005436592224626639, "train_loss": 0.2683283319660964, "train_loss_scale": 32768.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.012031311473737542, "epoch": 1158, "n_parameters": 303924416} {"train_lr": 0.0005413989111389974, "train_min_lr": 0.0005413989111389974, "train_loss": 0.2682528412327744, "train_loss_scale": 32768.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.011677185192895241, "epoch": 1159, "n_parameters": 303924416} {"train_lr": 0.0005368895828813646, "train_min_lr": 0.0005368895828813646, "train_loss": 0.2681640406521276, "train_loss_scale": 32768.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.011586967142274937, "epoch": 1161, "n_parameters": 303924416} {"train_lr": 0.0005346405835516359, "train_min_lr": 0.0005346405835516359, "train_loss": 0.268130203237375, "train_loss_scale": 32768.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.01173339526837644, "epoch": 1162, "n_parameters": 303924416} {"train_lr": 0.0005323953724662217, "train_min_lr": 0.0005323953724662217, "train_loss": 0.2681402110685714, "train_loss_scale": 39699.692307692305, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.012002449228953665, "epoch": 1163, "n_parameters": 303924416} {"train_lr": 0.0005301539583903355, "train_min_lr": 0.0005301539583903355, "train_loss": 0.26814391527575654, "train_loss_scale": 46946.46153846154, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 1164, "n_parameters": 303924416} {"train_lr": 0.00052791635007437, "train_min_lr": 0.00052791635007437, "train_loss": 0.2681339100325623, "train_loss_scale": 32768.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.011945415249404808, "epoch": 1165, "n_parameters": 303924416} {"train_lr": 0.0005256825562538566, "train_min_lr": 0.0005256825562538566, "train_loss": 0.2681045572374923, "train_loss_scale": 32768.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.012095246807290003, "epoch": 1166, "n_parameters": 303924416} {"train_lr": 0.0005234525856494381, "train_min_lr": 0.0005234525856494381, "train_loss": 0.26816517975814164, "train_loss_scale": 32768.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.012096130996584319, "epoch": 1167, "n_parameters": 303924416} {"train_lr": 0.0005212264469668297, "train_min_lr": 0.0005212264469668297, "train_loss": 0.26802073140294314, "train_loss_scale": 32768.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.01178639414660537, "epoch": 1168, "n_parameters": 303924416} {"train_lr": 0.0005190041488967883, "train_min_lr": 0.0005190041488967883, "train_loss": 0.2679616953813447, "train_loss_scale": 31192.615384615383, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 1169, "n_parameters": 303924416} {"train_lr": 0.0005167857001150759, "train_min_lr": 0.0005167857001150759, "train_loss": 0.268063516723207, "train_loss_scale": 16384.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.0121347085507109, "epoch": 1170, "n_parameters": 303924416} {"train_lr": 0.0005145711092824282, "train_min_lr": 0.0005145711092824282, "train_loss": 0.26800661778543144, "train_loss_scale": 16384.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.012192177132536203, "epoch": 1171, "n_parameters": 303924416} {"train_lr": 0.0005123603850445193, "train_min_lr": 0.0005123603850445193, "train_loss": 0.26805333043627727, "train_loss_scale": 16384.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.012483178028980127, "epoch": 1172, "n_parameters": 303924416} {"train_lr": 0.0005101535360319284, "train_min_lr": 0.0005101535360319284, "train_loss": 0.2680056050766068, "train_loss_scale": 16384.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.012166075193538116, "epoch": 1173, "n_parameters": 303924416} {"train_lr": 0.0005079505708601042, "train_min_lr": 0.0005079505708601042, "train_loss": 0.2679106754770216, "train_loss_scale": 16384.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.0120547738385936, "epoch": 1174, "n_parameters": 303924416} {"train_lr": 0.000505751498129336, "train_min_lr": 0.000505751498129336, "train_loss": 0.2678533791659925, "train_loss_scale": 16384.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.012017927839993857, "epoch": 1175, "n_parameters": 303924416} {"train_lr": 0.0005035563264247157, "train_min_lr": 0.0005035563264247157, "train_loss": 0.2679159865380289, "train_loss_scale": 19797.333333333332, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 1176, "n_parameters": 303924416} {"train_lr": 0.0005013650643161046, "train_min_lr": 0.0005013650643161046, "train_loss": 0.2679099713046199, "train_loss_scale": 16384.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.012283444828473223, "epoch": 1177, "n_parameters": 303924416} {"train_lr": 0.0004991777203581037, "train_min_lr": 0.0004991777203581037, "train_loss": 0.2679836663674229, "train_loss_scale": 16384.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.012510374280958412, "epoch": 1178, "n_parameters": 303924416} {"train_lr": 0.0004969943030900144, "train_min_lr": 0.0004969943030900144, "train_loss": 0.267927636914791, "train_loss_scale": 16384.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.012717952575677862, "epoch": 1179, "n_parameters": 303924416} {"train_lr": 0.0004948148210358123, "train_min_lr": 0.0004948148210358123, "train_loss": 0.2679263151650771, "train_loss_scale": 16384.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.01231901642257491, "epoch": 1180, "n_parameters": 303924416} {"train_lr": 0.000492639282704107, "train_min_lr": 0.000492639282704107, "train_loss": 0.2679423066871002, "train_loss_scale": 16384.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.012712849345870124, "epoch": 1181, "n_parameters": 303924416} {"train_lr": 0.0004904676965881128, "train_min_lr": 0.0004904676965881128, "train_loss": 0.26802981229355705, "train_loss_scale": 17486.76923076923, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.012996416174782774, "epoch": 1182, "n_parameters": 303924416} {"train_lr": 0.0004883000711656163, "train_min_lr": 0.0004883000711656163, "train_loss": 0.2678708800335582, "train_loss_scale": 32768.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.012382310588294879, "epoch": 1183, "n_parameters": 303924416} {"train_lr": 0.000486136414898939, "train_min_lr": 0.000486136414898939, "train_loss": 0.26781217508818, "train_loss_scale": 32768.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.012363133868441368, "epoch": 1184, "n_parameters": 303924416} {"train_lr": 0.00048397673623490903, "train_min_lr": 0.00048397673623490903, "train_loss": 0.2678285857949119, "train_loss_scale": 32768.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.012563006910614861, "epoch": 1185, "n_parameters": 303924416} {"train_lr": 0.0004818210436048244, "train_min_lr": 0.0004818210436048244, "train_loss": 0.26778139122940886, "train_loss_scale": 32768.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.012380661314520508, "epoch": 1186, "n_parameters": 303924416} {"train_lr": 0.00047966934542442426, "train_min_lr": 0.00047966934542442426, "train_loss": 0.2677958803818537, "train_loss_scale": 18747.076923076922, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 1187, "n_parameters": 303924416} {"train_lr": 0.000477521650093852, "train_min_lr": 0.000477521650093852, "train_loss": 0.26771576094250077, "train_loss_scale": 16384.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.012316645394700268, "epoch": 1188, "n_parameters": 303924416} {"train_lr": 0.000475377965997623, "train_min_lr": 0.000475377965997623, "train_loss": 0.26766225966350293, "train_loss_scale": 16384.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.012261987481719982, "epoch": 1189, "n_parameters": 303924416} {"train_lr": 0.00047323830150459544, "train_min_lr": 0.00047323830150459544, "train_loss": 0.26767587843530166, "train_loss_scale": 16384.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.01249472026187831, "epoch": 1190, "n_parameters": 303924416} {"train_lr": 0.000471102664967933, "train_min_lr": 0.000471102664967933, "train_loss": 0.2677036700501608, "train_loss_scale": 16384.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.012646953214127094, "epoch": 1191, "n_parameters": 303924416} {"train_lr": 0.00046897106472507544, "train_min_lr": 0.00046897106472507544, "train_loss": 0.2677101779830618, "train_loss_scale": 16384.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.012748056930155518, "epoch": 1192, "n_parameters": 303924416} {"train_lr": 0.00046684350909770566, "train_min_lr": 0.00046684350909770566, "train_loss": 0.2676349291189884, "train_loss_scale": 23683.28205128205, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.012341483430459332, "epoch": 1193, "n_parameters": 303924416} {"train_lr": 0.00046472000639171394, "train_min_lr": 0.00046472000639171394, "train_loss": 0.26762260833325296, "train_loss_scale": 32768.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.012727629056033224, "epoch": 1194, "n_parameters": 303924416} {"train_lr": 0.00046260056489717095, "train_min_lr": 0.00046260056489717095, "train_loss": 0.2675675831866475, "train_loss_scale": 32768.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.012467819880932951, "epoch": 1195, "n_parameters": 303924416} {"train_lr": 0.0004604851928882911, "train_min_lr": 0.0004604851928882911, "train_loss": 0.26756217685910183, "train_loss_scale": 32768.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.012793764239177108, "epoch": 1196, "n_parameters": 303924416} {"train_lr": 0.0004583738986234033, "train_min_lr": 0.0004583738986234033, "train_loss": 0.2675774609252142, "train_loss_scale": 32768.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.012896609901546095, "epoch": 1197, "n_parameters": 303924416} {"train_lr": 0.0004562666903449135, "train_min_lr": 0.0004562666903449135, "train_loss": 0.2675117790901986, "train_loss_scale": 32768.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.012780468385571089, "epoch": 1198, "n_parameters": 303924416} {"train_lr": 0.0004541635762792799, "train_min_lr": 0.0004541635762792799, "train_loss": 0.2675336847284761, "train_loss_scale": 33923.282051282054, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.012921588190902885, "epoch": 1199, "n_parameters": 303924416} {"train_lr": 0.00045206456463697597, "train_min_lr": 0.00045206456463697597, "train_loss": 0.267644220437759, "train_loss_scale": 36601.4358974359, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 1200, "n_parameters": 303924416} {"train_lr": 0.000449969663612458, "train_min_lr": 0.000449969663612458, "train_loss": 0.26749611013115215, "train_loss_scale": 16384.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.01256446776469835, "epoch": 1201, "n_parameters": 303924416} {"train_lr": 0.00044787888138413627, "train_min_lr": 0.00044787888138413627, "train_loss": 0.26742300286423415, "train_loss_scale": 16384.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.012610081362370879, "epoch": 1202, "n_parameters": 303924416} {"train_lr": 0.00044579222611434153, "train_min_lr": 0.00044579222611434153, "train_loss": 0.26746958909998053, "train_loss_scale": 16384.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.01297949510327994, "epoch": 1203, "n_parameters": 303924416} {"train_lr": 0.0004437097059492909, "train_min_lr": 0.0004437097059492909, "train_loss": 0.2673926864338752, "train_loss_scale": 16384.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.012702309466802921, "epoch": 1204, "n_parameters": 303924416} {"train_lr": 0.00044163132901906124, "train_min_lr": 0.00044163132901906124, "train_loss": 0.2673329032885914, "train_loss_scale": 16384.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.012653922166627569, "epoch": 1205, "n_parameters": 303924416} {"train_lr": 0.00043955710343755196, "train_min_lr": 0.00043955710343755196, "train_loss": 0.26733704869790625, "train_loss_scale": 16384.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.012734495763046047, "epoch": 1206, "n_parameters": 303924416} {"train_lr": 0.0004374870373024571, "train_min_lr": 0.0004374870373024571, "train_loss": 0.26732932624872774, "train_loss_scale": 27779.28205128205, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.012971119477580756, "epoch": 1207, "n_parameters": 303924416} {"train_lr": 0.0004354211386952321, "train_min_lr": 0.0004354211386952321, "train_loss": 0.26732164153602356, "train_loss_scale": 32768.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.012968414137736918, "epoch": 1208, "n_parameters": 303924416} {"train_lr": 0.00043335941568106186, "train_min_lr": 0.00043335941568106186, "train_loss": 0.26725611419608003, "train_loss_scale": 32768.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.012784957787236916, "epoch": 1209, "n_parameters": 303924416} {"train_lr": 0.0004313018763088307, "train_min_lr": 0.0004313018763088307, "train_loss": 0.26724309151848924, "train_loss_scale": 32768.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.012805836427455338, "epoch": 1210, "n_parameters": 303924416} {"train_lr": 0.0004292485286110903, "train_min_lr": 0.0004292485286110903, "train_loss": 0.26721668436239737, "train_loss_scale": 32768.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.01293682062234252, "epoch": 1211, "n_parameters": 303924416} {"train_lr": 0.0004271993806040275, "train_min_lr": 0.0004271993806040275, "train_loss": 0.2672235186581906, "train_loss_scale": 32768.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.012735736299640475, "epoch": 1212, "n_parameters": 303924416} {"train_lr": 0.00042515444028743435, "train_min_lr": 0.00042515444028743435, "train_loss": 0.2670741018283969, "train_loss_scale": 42115.282051282054, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.012474369353208786, "epoch": 1213, "n_parameters": 303924416} {"train_lr": 0.00042311371564467587, "train_min_lr": 0.00042311371564467587, "train_loss": 0.2670932868256783, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.012655048204275468, "epoch": 1214, "n_parameters": 303924416} {"train_lr": 0.00042107721464265945, "train_min_lr": 0.00042107721464265945, "train_loss": 0.2671015414003378, "train_loss_scale": 27306.666666666668, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 1215, "n_parameters": 303924416} {"train_lr": 0.000419044945231803, "train_min_lr": 0.000419044945231803, "train_loss": 0.2669922230401052, "train_loss_scale": 9294.76923076923, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 1216, "n_parameters": 303924416} {"train_lr": 0.00041701691534600573, "train_min_lr": 0.00041701691534600573, "train_loss": 0.26707238983661413, "train_loss_scale": 8192.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.013043638136691581, "epoch": 1217, "n_parameters": 303924416} {"train_lr": 0.0004149931329026143, "train_min_lr": 0.0004149931329026143, "train_loss": 0.2670597383358444, "train_loss_scale": 8192.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.013195144725987354, "epoch": 1218, "n_parameters": 303924416} {"train_lr": 0.00041297360580239503, "train_min_lr": 0.00041297360580239503, "train_loss": 0.2672160605762679, "train_loss_scale": 8192.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.013974099327475788, "epoch": 1219, "n_parameters": 303924416} {"train_lr": 0.00041095834192950083, "train_min_lr": 0.00041095834192950083, "train_loss": 0.26713863603818494, "train_loss_scale": 8192.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.013312071045406926, "epoch": 1220, "n_parameters": 303924416} {"train_lr": 0.00040894734915144056, "train_min_lr": 0.00040894734915144056, "train_loss": 0.2670957968504622, "train_loss_scale": 8192.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.013370127426102184, "epoch": 1221, "n_parameters": 303924416} {"train_lr": 0.0004069406353190497, "train_min_lr": 0.0004069406353190497, "train_loss": 0.2671211696952247, "train_loss_scale": 11920.410256410256, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.013624834251972154, "epoch": 1222, "n_parameters": 303924416} {"train_lr": 0.000404938208266459, "train_min_lr": 0.000404938208266459, "train_loss": 0.26701526261436254, "train_loss_scale": 16384.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.01324440337693653, "epoch": 1223, "n_parameters": 303924416} {"train_lr": 0.0004029400758110638, "train_min_lr": 0.0004029400758110638, "train_loss": 0.26704000324034727, "train_loss_scale": 16384.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.01338431786877127, "epoch": 1224, "n_parameters": 303924416} {"train_lr": 0.0004009462457534931, "train_min_lr": 0.0004009462457534931, "train_loss": 0.2669685703630631, "train_loss_scale": 16384.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.013337853448226666, "epoch": 1225, "n_parameters": 303924416} {"train_lr": 0.0003989567258775788, "train_min_lr": 0.0003989567258775788, "train_loss": 0.26700426048587245, "train_loss_scale": 16384.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.013574605076334989, "epoch": 1226, "n_parameters": 303924416} {"train_lr": 0.0003969715239503275, "train_min_lr": 0.0003969715239503275, "train_loss": 0.26696243916208356, "train_loss_scale": 16384.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.013439457944164483, "epoch": 1227, "n_parameters": 303924416} {"train_lr": 0.00039499064772188796, "train_min_lr": 0.00039499064772188796, "train_loss": 0.2670092740788674, "train_loss_scale": 17119.17948717949, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.013740522028782811, "epoch": 1228, "n_parameters": 303924416} {"train_lr": 0.0003930141049255215, "train_min_lr": 0.0003930141049255215, "train_loss": 0.2669294896857956, "train_loss_scale": 32768.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.013311765523842322, "epoch": 1229, "n_parameters": 303924416} {"train_lr": 0.0003910419032775726, "train_min_lr": 0.0003910419032775726, "train_loss": 0.2668797453829589, "train_loss_scale": 32768.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.013053663633166788, "epoch": 1230, "n_parameters": 303924416} {"train_lr": 0.0003890740504774367, "train_min_lr": 0.0003890740504774367, "train_loss": 0.2668099084880967, "train_loss_scale": 32768.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.01318019521362984, "epoch": 1231, "n_parameters": 303924416} {"train_lr": 0.00038711055420753357, "train_min_lr": 0.00038711055420753357, "train_loss": 0.2668100806006875, "train_loss_scale": 32768.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.013467899977396697, "epoch": 1232, "n_parameters": 303924416} {"train_lr": 0.00038515142213327275, "train_min_lr": 0.00038515142213327275, "train_loss": 0.26681465873769367, "train_loss_scale": 32768.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.01368406028427088, "epoch": 1233, "n_parameters": 303924416} {"train_lr": 0.0003831966619030283, "train_min_lr": 0.0003831966619030283, "train_loss": 0.26691118139439285, "train_loss_scale": 32768.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.01403196767056122, "epoch": 1234, "n_parameters": 303924416} {"train_lr": 0.0003812462811481052, "train_min_lr": 0.0003812462811481052, "train_loss": 0.266959957465028, "train_loss_scale": 19167.17948717949, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 1235, "n_parameters": 303924416} {"train_lr": 0.00037930028748271266, "train_min_lr": 0.00037930028748271266, "train_loss": 0.26685519982726336, "train_loss_scale": 16384.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.013639010465106903, "epoch": 1236, "n_parameters": 303924416} {"train_lr": 0.0003773586885039312, "train_min_lr": 0.0003773586885039312, "train_loss": 0.2668096143453836, "train_loss_scale": 16384.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.013683802809399098, "epoch": 1237, "n_parameters": 303924416} {"train_lr": 0.00037542149179168607, "train_min_lr": 0.00037542149179168607, "train_loss": 0.26680476638751155, "train_loss_scale": 16384.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.013721180479758633, "epoch": 1238, "n_parameters": 303924416} {"train_lr": 0.00037348870490871565, "train_min_lr": 0.00037348870490871565, "train_loss": 0.2668069401749959, "train_loss_scale": 16384.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.013919975751867661, "epoch": 1239, "n_parameters": 303924416} {"train_lr": 0.0003715603354005422, "train_min_lr": 0.0003715603354005422, "train_loss": 0.2667534480689285, "train_loss_scale": 13390.76923076923, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 1240, "n_parameters": 303924416} {"train_lr": 0.00036963639079544305, "train_min_lr": 0.00036963639079544305, "train_loss": 0.26675668457606566, "train_loss_scale": 8192.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.014042075362223655, "epoch": 1241, "n_parameters": 303924416} {"train_lr": 0.00036771687860442183, "train_min_lr": 0.00036771687860442183, "train_loss": 0.2667566443948696, "train_loss_scale": 8192.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.014145569323609846, "epoch": 1242, "n_parameters": 303924416} {"train_lr": 0.000365801806321176, "train_min_lr": 0.000365801806321176, "train_loss": 0.26673276328111595, "train_loss_scale": 8192.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.014009379086275704, "epoch": 1243, "n_parameters": 303924416} {"train_lr": 0.00036389118142207233, "train_min_lr": 0.00036389118142207233, "train_loss": 0.2666061065153768, "train_loss_scale": 8192.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.013514212472961308, "epoch": 1244, "n_parameters": 303924416} {"train_lr": 0.00036198501136611275, "train_min_lr": 0.00036198501136611275, "train_loss": 0.2665857442278558, "train_loss_scale": 8192.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.013584372871077787, "epoch": 1245, "n_parameters": 303924416} {"train_lr": 0.0003600833035949099, "train_min_lr": 0.0003600833035949099, "train_loss": 0.26653045657365465, "train_loss_scale": 8192.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.01355205061666381, "epoch": 1246, "n_parameters": 303924416} {"train_lr": 0.0003581860655326535, "train_min_lr": 0.0003581860655326535, "train_loss": 0.2665674334887463, "train_loss_scale": 16016.410256410256, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.013835572697317753, "epoch": 1247, "n_parameters": 303924416} {"train_lr": 0.0003562933045860865, "train_min_lr": 0.0003562933045860865, "train_loss": 0.26653541994388574, "train_loss_scale": 16384.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.013765582734217437, "epoch": 1248, "n_parameters": 303924416} {"train_lr": 0.0003544050281444713, "train_min_lr": 0.0003544050281444713, "train_loss": 0.2664988348021721, "train_loss_scale": 16384.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.013666618110922476, "epoch": 1249, "n_parameters": 303924416} {"train_lr": 0.00035252124357956267, "train_min_lr": 0.00035252124357956267, "train_loss": 0.266451388448238, "train_loss_scale": 16384.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.013663787902213441, "epoch": 1250, "n_parameters": 303924416} {"train_lr": 0.0003506419582455813, "train_min_lr": 0.0003506419582455813, "train_loss": 0.2664545719446137, "train_loss_scale": 16384.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.01407838384143244, "epoch": 1251, "n_parameters": 303924416} {"train_lr": 0.00034876717947918156, "train_min_lr": 0.00034876717947918156, "train_loss": 0.26637573844681567, "train_loss_scale": 16384.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.013764966243448166, "epoch": 1252, "n_parameters": 303924416} {"train_lr": 0.00034689691459942405, "train_min_lr": 0.00034689691459942405, "train_loss": 0.26644540821405077, "train_loss_scale": 25311.17948717949, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.01412123706466399, "epoch": 1253, "n_parameters": 303924416} {"train_lr": 0.0003450311709077491, "train_min_lr": 0.0003450311709077491, "train_loss": 0.2664032169820693, "train_loss_scale": 32768.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.014227565428695809, "epoch": 1254, "n_parameters": 303924416} {"train_lr": 0.00034316995568794414, "train_min_lr": 0.00034316995568794414, "train_loss": 0.2663574298001969, "train_loss_scale": 32768.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.013817492409800299, "epoch": 1255, "n_parameters": 303924416} {"train_lr": 0.00034131327620612003, "train_min_lr": 0.00034131327620612003, "train_loss": 0.2662993853386396, "train_loss_scale": 32768.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.013770174673901727, "epoch": 1256, "n_parameters": 303924416} {"train_lr": 0.00033946113971067857, "train_min_lr": 0.00033946113971067857, "train_loss": 0.26631291983470035, "train_loss_scale": 16594.05128205128, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 1257, "n_parameters": 303924416} {"train_lr": 0.0003376135534322866, "train_min_lr": 0.0003376135534322866, "train_loss": 0.26634177683780974, "train_loss_scale": 16384.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.014083739253692329, "epoch": 1258, "n_parameters": 303924416} {"train_lr": 0.0003357705245838467, "train_min_lr": 0.0003357705245838467, "train_loss": 0.26624657592783946, "train_loss_scale": 16384.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.01382512989668892, "epoch": 1259, "n_parameters": 303924416} {"train_lr": 0.00033393206036047053, "train_min_lr": 0.00033393206036047053, "train_loss": 0.26629176085444695, "train_loss_scale": 16384.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.014321199677980099, "epoch": 1260, "n_parameters": 303924416} {"train_lr": 0.0003320981679394479, "train_min_lr": 0.0003320981679394479, "train_loss": 0.26621207364130384, "train_loss_scale": 16384.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.013978318762010297, "epoch": 1261, "n_parameters": 303924416} {"train_lr": 0.0003302688544802215, "train_min_lr": 0.0003302688544802215, "train_loss": 0.2662337632485641, "train_loss_scale": 16384.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.014198515438832916, "epoch": 1262, "n_parameters": 303924416} {"train_lr": 0.0003284441271243585, "train_min_lr": 0.0003284441271243585, "train_loss": 0.2661958662912441, "train_loss_scale": 19876.102564102563, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 1263, "n_parameters": 303924416} {"train_lr": 0.0003266239929955209, "train_min_lr": 0.0003266239929955209, "train_loss": 0.26624096384666, "train_loss_scale": 8192.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.014655588502183748, "epoch": 1264, "n_parameters": 303924416} {"train_lr": 0.00032480845919943997, "train_min_lr": 0.00032480845919943997, "train_loss": 0.26616015606141913, "train_loss_scale": 8192.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.014441264324630491, "epoch": 1265, "n_parameters": 303924416} {"train_lr": 0.0003229975328238862, "train_min_lr": 0.0003229975328238862, "train_loss": 0.2662079762936068, "train_loss_scale": 8192.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.015005363814102916, "epoch": 1266, "n_parameters": 303924416} {"train_lr": 0.00032119122093864414, "train_min_lr": 0.00032119122093864414, "train_loss": 0.2662745144797298, "train_loss_scale": 8192.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.014945157582704455, "epoch": 1267, "n_parameters": 303924416} {"train_lr": 0.00031938953059548313, "train_min_lr": 0.00031938953059548313, "train_loss": 0.2661812339688484, "train_loss_scale": 8192.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.01433087576729938, "epoch": 1268, "n_parameters": 303924416} {"train_lr": 0.0003175924688281298, "train_min_lr": 0.0003175924688281298, "train_loss": 0.266139129946868, "train_loss_scale": 8192.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.014281905769633178, "epoch": 1269, "n_parameters": 303924416} {"train_lr": 0.00031580004265224147, "train_min_lr": 0.00031580004265224147, "train_loss": 0.2660477716738406, "train_loss_scale": 14519.794871794871, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.01432750525418669, "epoch": 1270, "n_parameters": 303924416} {"train_lr": 0.0003140122590653787, "train_min_lr": 0.0003140122590653787, "train_loss": 0.2660600105390096, "train_loss_scale": 16384.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.014308963645583926, "epoch": 1271, "n_parameters": 303924416} {"train_lr": 0.0003122291250469768, "train_min_lr": 0.0003122291250469768, "train_loss": 0.2659783265284764, "train_loss_scale": 16384.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.014062177786866251, "epoch": 1272, "n_parameters": 303924416} {"train_lr": 0.00031045064755831896, "train_min_lr": 0.00031045064755831896, "train_loss": 0.26592947121184224, "train_loss_scale": 16384.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.013963714173326317, "epoch": 1273, "n_parameters": 303924416} {"train_lr": 0.0003086768335425105, "train_min_lr": 0.0003086768335425105, "train_loss": 0.26591501847649807, "train_loss_scale": 16384.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.014345267092068799, "epoch": 1274, "n_parameters": 303924416} {"train_lr": 0.00030690768992445067, "train_min_lr": 0.00030690768992445067, "train_loss": 0.26592398795167893, "train_loss_scale": 16384.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.014596998846779266, "epoch": 1275, "n_parameters": 303924416} {"train_lr": 0.00030514322361080464, "train_min_lr": 0.00030514322361080464, "train_loss": 0.2658989401295399, "train_loss_scale": 22317.94871794872, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.014575819935625752, "epoch": 1276, "n_parameters": 303924416} {"train_lr": 0.0003033834414899792, "train_min_lr": 0.0003033834414899792, "train_loss": 0.26594833564311743, "train_loss_scale": 32768.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.01477764422694842, "epoch": 1277, "n_parameters": 303924416} {"train_lr": 0.0003016283504320918, "train_min_lr": 0.0003016283504320918, "train_loss": 0.2658854702529378, "train_loss_scale": 32768.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.01428134048071045, "epoch": 1278, "n_parameters": 303924416} {"train_lr": 0.00029987795728894943, "train_min_lr": 0.00029987795728894943, "train_loss": 0.26579732586068505, "train_loss_scale": 21162.666666666668, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 1279, "n_parameters": 303924416} {"train_lr": 0.0002981322688940158, "train_min_lr": 0.0002981322688940158, "train_loss": 0.26584204927815175, "train_loss_scale": 16384.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.014687563038956469, "epoch": 1280, "n_parameters": 303924416} {"train_lr": 0.0002963912920623888, "train_min_lr": 0.0002963912920623888, "train_loss": 0.265754620416257, "train_loss_scale": 8585.846153846154, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 1281, "n_parameters": 303924416} {"train_lr": 0.0002946550335907722, "train_min_lr": 0.0002946550335907722, "train_loss": 0.26574997646578896, "train_loss_scale": 8192.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.014134255497573087, "epoch": 1282, "n_parameters": 303924416} {"train_lr": 0.0002929235002574496, "train_min_lr": 0.0002929235002574496, "train_loss": 0.2656675152755223, "train_loss_scale": 8192.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.014228759805122629, "epoch": 1283, "n_parameters": 303924416} {"train_lr": 0.0002911966988222576, "train_min_lr": 0.0002911966988222576, "train_loss": 0.26560049143452674, "train_loss_scale": 8192.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.014165161401391603, "epoch": 1284, "n_parameters": 303924416} {"train_lr": 0.00028947463602656026, "train_min_lr": 0.00028947463602656026, "train_loss": 0.2656368436769415, "train_loss_scale": 8192.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.014847976815265914, "epoch": 1285, "n_parameters": 303924416} {"train_lr": 0.0002877573185932216, "train_min_lr": 0.0002877573185932216, "train_loss": 0.26560958470993984, "train_loss_scale": 8192.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.014503335639929924, "epoch": 1286, "n_parameters": 303924416} {"train_lr": 0.0002860447532265804, "train_min_lr": 0.0002860447532265804, "train_loss": 0.2655447654378338, "train_loss_scale": 12629.333333333334, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.014498409809162602, "epoch": 1287, "n_parameters": 303924416} {"train_lr": 0.00028433694661242245, "train_min_lr": 0.00028433694661242245, "train_loss": 0.2655389699338673, "train_loss_scale": 16384.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.014530218173510944, "epoch": 1288, "n_parameters": 303924416} {"train_lr": 0.0002826339054179573, "train_min_lr": 0.0002826339054179573, "train_loss": 0.2655349785360532, "train_loss_scale": 16384.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.014618111771340363, "epoch": 1289, "n_parameters": 303924416} {"train_lr": 0.00028093563629178934, "train_min_lr": 0.00028093563629178934, "train_loss": 0.26552514630990726, "train_loss_scale": 16384.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.014766207329021433, "epoch": 1290, "n_parameters": 303924416} {"train_lr": 0.00027924214586389314, "train_min_lr": 0.00027924214586389314, "train_loss": 0.2654764923111846, "train_loss_scale": 16384.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.014794968937237103, "epoch": 1291, "n_parameters": 303924416} {"train_lr": 0.00027755344074558737, "train_min_lr": 0.00027755344074558737, "train_loss": 0.26550913570472634, "train_loss_scale": 16384.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.015033695196064237, "epoch": 1292, "n_parameters": 303924416} {"train_lr": 0.00027586952752950975, "train_min_lr": 0.00027586952752950975, "train_loss": 0.2654807069309008, "train_loss_scale": 18537.02564102564, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.015261292600860963, "epoch": 1293, "n_parameters": 303924416} {"train_lr": 0.0002741904127895894, "train_min_lr": 0.0002741904127895894, "train_loss": 0.265435883608193, "train_loss_scale": 18484.51282051282, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 1294, "n_parameters": 303924416} {"train_lr": 0.00027251610308102385, "train_min_lr": 0.00027251610308102385, "train_loss": 0.26538347706306153, "train_loss_scale": 16384.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.014687996653195184, "epoch": 1295, "n_parameters": 303924416} {"train_lr": 0.00027084660494025017, "train_min_lr": 0.00027084660494025017, "train_loss": 0.2653296848950096, "train_loss_scale": 10975.179487179486, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 1296, "n_parameters": 303924416} {"train_lr": 0.00026918192488492327, "train_min_lr": 0.00026918192488492327, "train_loss": 0.26534100745816547, "train_loss_scale": 8192.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.014674245075394329, "epoch": 1297, "n_parameters": 303924416} {"train_lr": 0.0002675220694138866, "train_min_lr": 0.0002675220694138866, "train_loss": 0.26529249735176563, "train_loss_scale": 8192.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.014996527700135723, "epoch": 1298, "n_parameters": 303924416} {"train_lr": 0.0002658670450071499, "train_min_lr": 0.0002658670450071499, "train_loss": 0.2653144556873788, "train_loss_scale": 8192.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.014783263857213732, "epoch": 1299, "n_parameters": 303924416} {"train_lr": 0.00026421685812586204, "train_min_lr": 0.00026421685812586204, "train_loss": 0.26522325386161894, "train_loss_scale": 8192.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.014730256498576356, "epoch": 1300, "n_parameters": 303924416} {"train_lr": 0.00026257151521228675, "train_min_lr": 0.00026257151521228675, "train_loss": 0.2652481478185226, "train_loss_scale": 8192.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.015068401215383066, "epoch": 1301, "n_parameters": 303924416} {"train_lr": 0.0002609310226897767, "train_min_lr": 0.0002609310226897767, "train_loss": 0.26524942696918374, "train_loss_scale": 10240.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.014859503627611468, "epoch": 1302, "n_parameters": 303924416} {"train_lr": 0.0002592953869627493, "train_min_lr": 0.0002592953869627493, "train_loss": 0.2652094248944941, "train_loss_scale": 16384.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.014982942617140137, "epoch": 1303, "n_parameters": 303924416} {"train_lr": 0.0002576646144166603, "train_min_lr": 0.0002576646144166603, "train_loss": 0.26521089929156005, "train_loss_scale": 16384.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.015292702096466644, "epoch": 1304, "n_parameters": 303924416} {"train_lr": 0.0002560387114179813, "train_min_lr": 0.0002560387114179813, "train_loss": 0.2652005285520154, "train_loss_scale": 16384.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.01562736582202025, "epoch": 1305, "n_parameters": 303924416} {"train_lr": 0.0002544176843141719, "train_min_lr": 0.0002544176843141719, "train_loss": 0.26515111726266927, "train_loss_scale": 16384.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.015239839257003788, "epoch": 1306, "n_parameters": 303924416} {"train_lr": 0.0002528015394336573, "train_min_lr": 0.0002528015394336573, "train_loss": 0.2651753141186558, "train_loss_scale": 16384.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.01548248036310841, "epoch": 1307, "n_parameters": 303924416} {"train_lr": 0.00025119028308580186, "train_min_lr": 0.00025119028308580186, "train_loss": 0.2651309054762794, "train_loss_scale": 11395.28205128205, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 1308, "n_parameters": 303924416} {"train_lr": 0.00024958392156088685, "train_min_lr": 0.00024958392156088685, "train_loss": 0.2651162198296963, "train_loss_scale": 8192.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.015127225292440599, "epoch": 1309, "n_parameters": 303924416} {"train_lr": 0.0002479824611300827, "train_min_lr": 0.0002479824611300827, "train_loss": 0.2650503904636328, "train_loss_scale": 8192.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.015307341669447338, "epoch": 1310, "n_parameters": 303924416} {"train_lr": 0.00024638590804542716, "train_min_lr": 0.00024638590804542716, "train_loss": 0.26507200121891517, "train_loss_scale": 8192.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.015529137636081148, "epoch": 1311, "n_parameters": 303924416} {"train_lr": 0.0002447942685397993, "train_min_lr": 0.0002447942685397993, "train_loss": 0.26498922716097856, "train_loss_scale": 8192.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.01526537706824736, "epoch": 1312, "n_parameters": 303924416} {"train_lr": 0.00024320754882689558, "train_min_lr": 0.00024320754882689558, "train_loss": 0.26501705496010775, "train_loss_scale": 8192.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.01568734727334231, "epoch": 1313, "n_parameters": 303924416} {"train_lr": 0.00024162575510120723, "train_min_lr": 0.00024162575510120723, "train_loss": 0.26491787102228653, "train_loss_scale": 9819.897435897436, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.015022592266830496, "epoch": 1314, "n_parameters": 303924416} {"train_lr": 0.00024004889353799303, "train_min_lr": 0.00024004889353799303, "train_loss": 0.2649552309825921, "train_loss_scale": 16384.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.01567861445260067, "epoch": 1315, "n_parameters": 303924416} {"train_lr": 0.00023847697029325722, "train_min_lr": 0.00023847697029325722, "train_loss": 0.26494097342798245, "train_loss_scale": 16384.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.015431201574989619, "epoch": 1316, "n_parameters": 303924416} {"train_lr": 0.00023690999150372558, "train_min_lr": 0.00023690999150372558, "train_loss": 0.2649475381327554, "train_loss_scale": 13049.435897435897, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 1317, "n_parameters": 303924416} {"train_lr": 0.00023534796328682044, "train_min_lr": 0.00023534796328682044, "train_loss": 0.2649927870519698, "train_loss_scale": 8192.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.016489859948006388, "epoch": 1318, "n_parameters": 303924416} {"train_lr": 0.0002337908917406379, "train_min_lr": 0.0002337908917406379, "train_loss": 0.2649142663382615, "train_loss_scale": 8192.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.01571888995404618, "epoch": 1319, "n_parameters": 303924416} {"train_lr": 0.0002322387829439219, "train_min_lr": 0.0002322387829439219, "train_loss": 0.2649291990808426, "train_loss_scale": 8192.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.016066534880500954, "epoch": 1320, "n_parameters": 303924416} {"train_lr": 0.00023069164295604397, "train_min_lr": 0.00023069164295604397, "train_loss": 0.26491812935087067, "train_loss_scale": 8192.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.016016871916154064, "epoch": 1321, "n_parameters": 303924416} {"train_lr": 0.00022914947781697628, "train_min_lr": 0.00022914947781697628, "train_loss": 0.26481900478784853, "train_loss_scale": 8192.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.015434264730757628, "epoch": 1322, "n_parameters": 303924416} {"train_lr": 0.0002276122935472699, "train_min_lr": 0.0002276122935472699, "train_loss": 0.26477813590258265, "train_loss_scale": 8192.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.015445337696478535, "epoch": 1323, "n_parameters": 303924416} {"train_lr": 0.0002260800961480308, "train_min_lr": 0.0002260800961480308, "train_loss": 0.26476791416270995, "train_loss_scale": 16357.74358974359, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.015885630168784887, "epoch": 1324, "n_parameters": 303924416} {"train_lr": 0.00022455289160089586, "train_min_lr": 0.00022455289160089586, "train_loss": 0.264758322137193, "train_loss_scale": 15149.948717948719, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 1325, "n_parameters": 303924416} {"train_lr": 0.0002230306858680111, "train_min_lr": 0.0002230306858680111, "train_loss": 0.2648084313513186, "train_loss_scale": 8192.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.016765483652050488, "epoch": 1326, "n_parameters": 303924416} {"train_lr": 0.0002215134848920061, "train_min_lr": 0.0002215134848920061, "train_loss": 0.26477640363960886, "train_loss_scale": 8192.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.016610964114825506, "epoch": 1327, "n_parameters": 303924416} {"train_lr": 0.00022000129459597318, "train_min_lr": 0.00022000129459597318, "train_loss": 0.2647217748662791, "train_loss_scale": 8192.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.016019614150699895, "epoch": 1328, "n_parameters": 303924416} {"train_lr": 0.00021849412088344262, "train_min_lr": 0.00021849412088344262, "train_loss": 0.2646964247165343, "train_loss_scale": 8192.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.016267913435466397, "epoch": 1329, "n_parameters": 303924416} {"train_lr": 0.00021699196963836007, "train_min_lr": 0.00021699196963836007, "train_loss": 0.26459710047735524, "train_loss_scale": 8192.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.01599480416207837, "epoch": 1330, "n_parameters": 303924416} {"train_lr": 0.0002154948467250644, "train_min_lr": 0.0002154948467250644, "train_loss": 0.264611833060208, "train_loss_scale": 8192.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.015897621570799787, "epoch": 1331, "n_parameters": 303924416} {"train_lr": 0.00021400275798826295, "train_min_lr": 0.00021400275798826295, "train_loss": 0.26459108307683027, "train_loss_scale": 14257.23076923077, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.016041693671678122, "epoch": 1332, "n_parameters": 303924416} {"train_lr": 0.00021251570925301055, "train_min_lr": 0.00021251570925301055, "train_loss": 0.26457273934143954, "train_loss_scale": 15097.435897435897, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 1333, "n_parameters": 303924416} {"train_lr": 0.00021103370632468622, "train_min_lr": 0.00021103370632468622, "train_loss": 0.26452579349577904, "train_loss_scale": 8192.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.015947678063709576, "epoch": 1334, "n_parameters": 303924416} {"train_lr": 0.00020955675498896944, "train_min_lr": 0.00020955675498896944, "train_loss": 0.26449532058531755, "train_loss_scale": 8192.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.016210684910989724, "epoch": 1335, "n_parameters": 303924416} {"train_lr": 0.00020808486101181987, "train_min_lr": 0.00020808486101181987, "train_loss": 0.26448467150270843, "train_loss_scale": 8192.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.016160307834163692, "epoch": 1336, "n_parameters": 303924416} {"train_lr": 0.00020661803013945218, "train_min_lr": 0.00020661803013945218, "train_loss": 0.2645166022374701, "train_loss_scale": 8192.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.016757128222917136, "epoch": 1337, "n_parameters": 303924416} {"train_lr": 0.00020515626809831545, "train_min_lr": 0.00020515626809831545, "train_loss": 0.2644639471038364, "train_loss_scale": 8192.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.016287751448078033, "epoch": 1338, "n_parameters": 303924416} {"train_lr": 0.00020369958059507004, "train_min_lr": 0.00020369958059507004, "train_loss": 0.2643953686615882, "train_loss_scale": 8192.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.016601950348092195, "epoch": 1339, "n_parameters": 303924416} {"train_lr": 0.00020224797331656628, "train_min_lr": 0.00020224797331656628, "train_loss": 0.2643855078510033, "train_loss_scale": 14309.74358974359, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.01614795489391933, "epoch": 1340, "n_parameters": 303924416} {"train_lr": 0.00020080145192982, "train_min_lr": 0.00020080145192982, "train_loss": 0.26441190973855555, "train_loss_scale": 16384.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.0162128088876414, "epoch": 1341, "n_parameters": 303924416} {"train_lr": 0.00019936002208199326, "train_min_lr": 0.00019936002208199326, "train_loss": 0.2643525077877805, "train_loss_scale": 13443.28205128205, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 1342, "n_parameters": 303924416} {"train_lr": 0.00019792368940037044, "train_min_lr": 0.00019792368940037044, "train_loss": 0.2643043082356692, "train_loss_scale": 8192.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.016175724789858438, "epoch": 1343, "n_parameters": 303924416} {"train_lr": 0.00019649245949233696, "train_min_lr": 0.00019649245949233696, "train_loss": 0.26427492248013806, "train_loss_scale": 8192.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.01611291948789492, "epoch": 1344, "n_parameters": 303924416} {"train_lr": 0.0001950663379453567, "train_min_lr": 0.0001950663379453567, "train_loss": 0.26425822083062184, "train_loss_scale": 8192.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.01641242086397818, "epoch": 1345, "n_parameters": 303924416} {"train_lr": 0.00019364533032695125, "train_min_lr": 0.00019364533032695125, "train_loss": 0.2642712627794259, "train_loss_scale": 8192.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.0169026896966478, "epoch": 1346, "n_parameters": 303924416} {"train_lr": 0.00019222944218467777, "train_min_lr": 0.00019222944218467777, "train_loss": 0.26422065899719316, "train_loss_scale": 8192.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.016681850189343095, "epoch": 1347, "n_parameters": 303924416} {"train_lr": 0.00019081867904610655, "train_min_lr": 0.00019081867904610655, "train_loss": 0.2641798547940711, "train_loss_scale": 8192.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.016256595919959437, "epoch": 1348, "n_parameters": 303924416} {"train_lr": 0.0001894130464188002, "train_min_lr": 0.0001894130464188002, "train_loss": 0.26419449137905854, "train_loss_scale": 15963.897435897436, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.01667728877136818, "epoch": 1349, "n_parameters": 303924416} {"train_lr": 0.00018801254979029276, "train_min_lr": 0.00018801254979029276, "train_loss": 0.26419500565717524, "train_loss_scale": 16384.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.01698119222270086, "epoch": 1350, "n_parameters": 303924416} {"train_lr": 0.00018661719462806676, "train_min_lr": 0.00018661719462806676, "train_loss": 0.2641860322059634, "train_loss_scale": 16384.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.016955567016385686, "epoch": 1351, "n_parameters": 303924416} {"train_lr": 0.000185226986379533, "train_min_lr": 0.000185226986379533, "train_loss": 0.2641239768604581, "train_loss_scale": 9399.794871794871, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 1352, "n_parameters": 303924416} {"train_lr": 0.00018384193047200835, "train_min_lr": 0.00018384193047200835, "train_loss": 0.26406306847452354, "train_loss_scale": 8192.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.016437104189744554, "epoch": 1353, "n_parameters": 303924416} {"train_lr": 0.00018246203231269632, "train_min_lr": 0.00018246203231269632, "train_loss": 0.2640168191984487, "train_loss_scale": 8192.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.016529071843251586, "epoch": 1354, "n_parameters": 303924416} {"train_lr": 0.00018108729728866365, "train_min_lr": 0.00018108729728866365, "train_loss": 0.26404501045218265, "train_loss_scale": 8192.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.016938888235017657, "epoch": 1355, "n_parameters": 303924416} {"train_lr": 0.00017971773076682075, "train_min_lr": 0.00017971773076682075, "train_loss": 0.26403160750543553, "train_loss_scale": 8192.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.01696775675130387, "epoch": 1356, "n_parameters": 303924416} {"train_lr": 0.0001783533380939009, "train_min_lr": 0.0001783533380939009, "train_loss": 0.2640237318357835, "train_loss_scale": 8192.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.017744831457280386, "epoch": 1357, "n_parameters": 303924416} {"train_lr": 0.00017699412459643834, "train_min_lr": 0.00017699412459643834, "train_loss": 0.2640132346244242, "train_loss_scale": 11815.384615384615, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.017014815188490618, "epoch": 1358, "n_parameters": 303924416} {"train_lr": 0.00017564009558074763, "train_min_lr": 0.00017564009558074763, "train_loss": 0.26394194569020796, "train_loss_scale": 10765.128205128205, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 1359, "n_parameters": 303924416} {"train_lr": 0.0001742912563329047, "train_min_lr": 0.0001742912563329047, "train_loss": 0.26396440094014484, "train_loss_scale": 8192.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.017411615633859467, "epoch": 1360, "n_parameters": 303924416} {"train_lr": 0.00017294761211872326, "train_min_lr": 0.00017294761211872326, "train_loss": 0.2639312081110592, "train_loss_scale": 8192.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.017304199667146, "epoch": 1361, "n_parameters": 303924416} {"train_lr": 0.00017160916818373685, "train_min_lr": 0.00017160916818373685, "train_loss": 0.26389301624387884, "train_loss_scale": 8192.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.01705692032686411, "epoch": 1362, "n_parameters": 303924416} {"train_lr": 0.00017027592975317707, "train_min_lr": 0.00017027592975317707, "train_loss": 0.2638645693809033, "train_loss_scale": 8192.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.017119768266685497, "epoch": 1363, "n_parameters": 303924416} {"train_lr": 0.0001689479020319532, "train_min_lr": 0.0001689479020319532, "train_loss": 0.2638094620152305, "train_loss_scale": 8192.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.016885447280051615, "epoch": 1364, "n_parameters": 303924416} {"train_lr": 0.0001676250902046324, "train_min_lr": 0.0001676250902046324, "train_loss": 0.26381133199156, "train_loss_scale": 10082.461538461539, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 1365, "n_parameters": 303924416} {"train_lr": 0.00016630749943541908, "train_min_lr": 0.00016630749943541908, "train_loss": 0.26375709608412135, "train_loss_scale": 8192.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.01752620679923357, "epoch": 1366, "n_parameters": 303924416} {"train_lr": 0.00016499513486813497, "train_min_lr": 0.00016499513486813497, "train_loss": 0.26382020835239345, "train_loss_scale": 8192.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.017951521028477985, "epoch": 1367, "n_parameters": 303924416} {"train_lr": 0.00016368800162619838, "train_min_lr": 0.00016368800162619838, "train_loss": 0.2638229195309134, "train_loss_scale": 8192.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.018204541435130894, "epoch": 1368, "n_parameters": 303924416} {"train_lr": 0.0001623861048126056, "train_min_lr": 0.0001623861048126056, "train_loss": 0.2637870679907978, "train_loss_scale": 8192.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.017675902891474273, "epoch": 1369, "n_parameters": 303924416} {"train_lr": 0.0001610894495099096, "train_min_lr": 0.0001610894495099096, "train_loss": 0.2637132323609713, "train_loss_scale": 8192.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.01760728831271617, "epoch": 1370, "n_parameters": 303924416} {"train_lr": 0.00015979804078020056, "train_min_lr": 0.00015979804078020056, "train_loss": 0.26372791695981646, "train_loss_scale": 8192.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.01791423710827262, "epoch": 1371, "n_parameters": 303924416} {"train_lr": 0.00015851188366508654, "train_min_lr": 0.00015851188366508654, "train_loss": 0.2636461287486152, "train_loss_scale": 13390.76923076923, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.017659975932194635, "epoch": 1372, "n_parameters": 303924416} {"train_lr": 0.00015723098318567354, "train_min_lr": 0.00015723098318567354, "train_loss": 0.26369537078202343, "train_loss_scale": 10975.179487179486, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 1373, "n_parameters": 303924416} {"train_lr": 0.00015595534434254574, "train_min_lr": 0.00015595534434254574, "train_loss": 0.2635796549580752, "train_loss_scale": 8192.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.01703838755090076, "epoch": 1374, "n_parameters": 303924416} {"train_lr": 0.000154684972115746, "train_min_lr": 0.000154684972115746, "train_loss": 0.26359368718635195, "train_loss_scale": 8192.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.01733616229970581, "epoch": 1375, "n_parameters": 303924416} {"train_lr": 0.00015341987146475668, "train_min_lr": 0.00015341987146475668, "train_loss": 0.26358521844332033, "train_loss_scale": 8192.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.018127687281379715, "epoch": 1376, "n_parameters": 303924416} {"train_lr": 0.00015216004732847995, "train_min_lr": 0.00015216004732847995, "train_loss": 0.26355253767150527, "train_loss_scale": 8192.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.018038880402365558, "epoch": 1377, "n_parameters": 303924416} {"train_lr": 0.00015090550462521928, "train_min_lr": 0.00015090550462521928, "train_loss": 0.2635178641678813, "train_loss_scale": 8192.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.01769763735146859, "epoch": 1378, "n_parameters": 303924416} {"train_lr": 0.00014965624825265868, "train_min_lr": 0.00014965624825265868, "train_loss": 0.26347614077325815, "train_loss_scale": 10240.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.01770084823230998, "epoch": 1379, "n_parameters": 303924416} {"train_lr": 0.00014841228308784527, "train_min_lr": 0.00014841228308784527, "train_loss": 0.2634729076899254, "train_loss_scale": 12603.076923076924, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 1380, "n_parameters": 303924416} {"train_lr": 0.00014717361398716892, "train_min_lr": 0.00014717361398716892, "train_loss": 0.2634370495947317, "train_loss_scale": 8192.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.017930647453818567, "epoch": 1381, "n_parameters": 303924416} {"train_lr": 0.00014594024578634438, "train_min_lr": 0.00014594024578634438, "train_loss": 0.2635761945783041, "train_loss_scale": 8192.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.020567801348172512, "epoch": 1382, "n_parameters": 303924416} {"train_lr": 0.0001447121833003921, "train_min_lr": 0.0001447121833003921, "train_loss": 0.26346945175184655, "train_loss_scale": 8192.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.018392407801002264, "epoch": 1383, "n_parameters": 303924416} {"train_lr": 0.00014348943132361824, "train_min_lr": 0.00014348943132361824, "train_loss": 0.263469879118463, "train_loss_scale": 8192.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.018695138096331786, "epoch": 1384, "n_parameters": 303924416} {"train_lr": 0.00014227199462959794, "train_min_lr": 0.00014227199462959794, "train_loss": 0.263359335054142, "train_loss_scale": 8192.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.017722277997586973, "epoch": 1385, "n_parameters": 303924416} {"train_lr": 0.00014105987797115546, "train_min_lr": 0.00014105987797115546, "train_loss": 0.26330021443442464, "train_loss_scale": 8612.102564102564, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.017386615667372752, "epoch": 1386, "n_parameters": 303924416} {"train_lr": 0.00013985308608034525, "train_min_lr": 0.00013985308608034525, "train_loss": 0.2632627169667289, "train_loss_scale": 16384.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.017539199590921786, "epoch": 1387, "n_parameters": 303924416} {"train_lr": 0.00013865162366843504, "train_min_lr": 0.00013865162366843504, "train_loss": 0.26325862442788023, "train_loss_scale": 16384.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.01774815176255428, "epoch": 1388, "n_parameters": 303924416} {"train_lr": 0.0001374554954258855, "train_min_lr": 0.0001374554954258855, "train_loss": 0.26322625489392054, "train_loss_scale": 16384.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.017851163376457035, "epoch": 1389, "n_parameters": 303924416} {"train_lr": 0.00013626470602233357, "train_min_lr": 0.00013626470602233357, "train_loss": 0.26319476917911416, "train_loss_scale": 16384.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.017832635919778392, "epoch": 1390, "n_parameters": 303924416} {"train_lr": 0.00013507926010657354, "train_min_lr": 0.00013507926010657354, "train_loss": 0.26320038213191604, "train_loss_scale": 14834.871794871795, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 1391, "n_parameters": 303924416} {"train_lr": 0.00013389916230653877, "train_min_lr": 0.00013389916230653877, "train_loss": 0.2631380078711141, "train_loss_scale": 8192.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.017693171489577837, "epoch": 1392, "n_parameters": 303924416} {"train_lr": 0.00013272441722928392, "train_min_lr": 0.00013272441722928392, "train_loss": 0.2631069697612801, "train_loss_scale": 8192.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.018179777911147818, "epoch": 1393, "n_parameters": 303924416} {"train_lr": 0.00013155502946096624, "train_min_lr": 0.00013155502946096624, "train_loss": 0.26314890476313824, "train_loss_scale": 8192.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.01842767478396686, "epoch": 1394, "n_parameters": 303924416} {"train_lr": 0.0001303910035668295, "train_min_lr": 0.0001303910035668295, "train_loss": 0.26307285830784494, "train_loss_scale": 8192.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.01821720574457103, "epoch": 1395, "n_parameters": 303924416} {"train_lr": 0.00012923234409118378, "train_min_lr": 0.00012923234409118378, "train_loss": 0.26306613956769115, "train_loss_scale": 8192.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.018296907318588823, "epoch": 1396, "n_parameters": 303924416} {"train_lr": 0.00012807905555738917, "train_min_lr": 0.00012807905555738917, "train_loss": 0.2629907488739357, "train_loss_scale": 8192.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.017550996442038853, "epoch": 1397, "n_parameters": 303924416} {"train_lr": 0.0001269311424678383, "train_min_lr": 0.0001269311424678383, "train_loss": 0.26296938491018057, "train_loss_scale": 10765.128205128205, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 1398, "n_parameters": 303924416} {"train_lr": 0.00012578860930393768, "train_min_lr": 0.00012578860930393768, "train_loss": 0.2629573241276189, "train_loss_scale": 8192.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.017939321938543938, "epoch": 1399, "n_parameters": 303924416} {"train_lr": 0.0001235197005736816, "train_min_lr": 0.0001235197005736816, "train_loss": 0.26288690126966685, "train_loss_scale": 8192.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.018215085827530578, "epoch": 1401, "n_parameters": 303924416} {"train_lr": 0.00012239333386505511, "train_min_lr": 0.00012239333386505511, "train_loss": 0.26288487840121466, "train_loss_scale": 8192.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.018075956091380272, "epoch": 1402, "n_parameters": 303924416} {"train_lr": 0.00012127236479750209, "train_min_lr": 0.00012127236479750209, "train_loss": 0.26287675957876044, "train_loss_scale": 8192.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.018025679722762644, "epoch": 1403, "n_parameters": 303924416} {"train_lr": 0.00012015679774724091, "train_min_lr": 0.00012015679774724091, "train_loss": 0.2628428707920158, "train_loss_scale": 8192.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.01827209771801837, "epoch": 1404, "n_parameters": 303924416} {"train_lr": 0.00011904663706940038, "train_min_lr": 0.00011904663706940038, "train_loss": 0.26284552595387095, "train_loss_scale": 8638.358974358975, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.018769517063330382, "epoch": 1405, "n_parameters": 303924416} {"train_lr": 0.00011794188709800375, "train_min_lr": 0.00011794188709800375, "train_loss": 0.26287056185388696, "train_loss_scale": 16384.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.019306848948009502, "epoch": 1406, "n_parameters": 303924416} {"train_lr": 0.0001168425521459504, "train_min_lr": 0.0001168425521459504, "train_loss": 0.2628199534628015, "train_loss_scale": 13417.02564102564, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 1407, "n_parameters": 303924416} {"train_lr": 0.000115748636505, "train_min_lr": 0.000115748636505, "train_loss": 0.2627425840040908, "train_loss_scale": 8192.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.018321283346710678, "epoch": 1408, "n_parameters": 303924416} {"train_lr": 0.00011466014444575561, "train_min_lr": 0.00011466014444575561, "train_loss": 0.2627616510935462, "train_loss_scale": 8192.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.019017651892052248, "epoch": 1409, "n_parameters": 303924416} {"train_lr": 0.00011357708021764657, "train_min_lr": 0.00011357708021764657, "train_loss": 0.2627659019602176, "train_loss_scale": 8192.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.01902641384050441, "epoch": 1410, "n_parameters": 303924416} {"train_lr": 0.00011249944804891208, "train_min_lr": 0.00011249944804891208, "train_loss": 0.26272157625629544, "train_loss_scale": 8192.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.019244483409401696, "epoch": 1411, "n_parameters": 303924416} {"train_lr": 0.00011142725214658523, "train_min_lr": 0.00011142725214658523, "train_loss": 0.26262793249677485, "train_loss_scale": 8192.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.01853663872521466, "epoch": 1412, "n_parameters": 303924416} {"train_lr": 0.00011036049669647565, "train_min_lr": 0.00011036049669647565, "train_loss": 0.26264498464894503, "train_loss_scale": 8192.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.018577451166959528, "epoch": 1413, "n_parameters": 303924416} {"train_lr": 0.0001092991858631544, "train_min_lr": 0.0001092991858631544, "train_loss": 0.2626170387569194, "train_loss_scale": 9084.71794871795, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 1414, "n_parameters": 303924416} {"train_lr": 0.00010824332378993593, "train_min_lr": 0.00010824332378993593, "train_loss": 0.26260583208372384, "train_loss_scale": 8192.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.01866139174821094, "epoch": 1415, "n_parameters": 303924416} {"train_lr": 0.00010719291459886363, "train_min_lr": 0.00010719291459886363, "train_loss": 0.2625850519040026, "train_loss_scale": 8192.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.01974393435133, "epoch": 1416, "n_parameters": 303924416} {"train_lr": 0.0001061479623906931, "train_min_lr": 0.0001061479623906931, "train_loss": 0.2625507482697662, "train_loss_scale": 6577.2307692307695, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 1417, "n_parameters": 303924416} {"train_lr": 0.0001051084712448757, "train_min_lr": 0.0001051084712448757, "train_loss": 0.26251817628657687, "train_loss_scale": 4096.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.018661609689633433, "epoch": 1418, "n_parameters": 303924416} {"train_lr": 0.00010407444521954368, "train_min_lr": 0.00010407444521954368, "train_loss": 0.2624779175519227, "train_loss_scale": 4096.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.01914448888661961, "epoch": 1419, "n_parameters": 303924416} {"train_lr": 0.00010202280465616816, "train_min_lr": 0.00010202280465616816, "train_loss": 0.26245798043106705, "train_loss_scale": 4096.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.018841963989707906, "epoch": 1421, "n_parameters": 303924416} {"train_lr": 0.00010100519812764733, "train_min_lr": 0.00010100519812764733, "train_loss": 0.2624036082430767, "train_loss_scale": 4096.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.019203126263351012, "epoch": 1422, "n_parameters": 303924416} {"train_lr": 9.999307273862563e-05, "train_min_lr": 9.999307273862563e-05, "train_loss": 0.26237050622391206, "train_loss_scale": 4096.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.01944540947293624, "epoch": 1423, "n_parameters": 303924416} {"train_lr": 9.898643244039997e-05, "train_min_lr": 9.898643244039997e-05, "train_loss": 0.26239387658185875, "train_loss_scale": 2435.2820512820513, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 1424, "n_parameters": 303924416} {"train_lr": 9.79852811628539e-05, "train_min_lr": 9.79852811628539e-05, "train_loss": 0.26238329014561784, "train_loss_scale": 2048.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.01961483451155707, "epoch": 1425, "n_parameters": 303924416} {"train_lr": 9.698962281444164e-05, "train_min_lr": 9.698962281444164e-05, "train_loss": 0.262358902556559, "train_loss_scale": 2048.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.019676293198687907, "epoch": 1426, "n_parameters": 303924416} {"train_lr": 9.599946128217389e-05, "train_min_lr": 9.599946128217389e-05, "train_loss": 0.26235864466080105, "train_loss_scale": 2048.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.019892395611327045, "epoch": 1427, "n_parameters": 303924416} {"train_lr": 9.501480043160137e-05, "train_min_lr": 9.501480043160137e-05, "train_loss": 0.2623074979413874, "train_loss_scale": 2048.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.020032048261222932, "epoch": 1428, "n_parameters": 303924416} {"train_lr": 9.403564410680083e-05, "train_min_lr": 9.403564410680083e-05, "train_loss": 0.2622903587314515, "train_loss_scale": 2048.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.019791553513361856, "epoch": 1429, "n_parameters": 303924416} {"train_lr": 9.306199613035916e-05, "train_min_lr": 9.306199613035916e-05, "train_loss": 0.26232303766748655, "train_loss_scale": 2868.5128205128203, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.02120656622812534, "epoch": 1430, "n_parameters": 303924416} {"train_lr": 9.209386030335916e-05, "train_min_lr": 9.209386030335916e-05, "train_loss": 0.2622943233388166, "train_loss_scale": 4096.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.020053656783719093, "epoch": 1431, "n_parameters": 303924416} {"train_lr": 9.113124040536432e-05, "train_min_lr": 9.113124040536432e-05, "train_loss": 0.26223721387139404, "train_loss_scale": 4096.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.020960263889999345, "epoch": 1432, "n_parameters": 303924416} {"train_lr": 9.01741401944042e-05, "train_min_lr": 9.01741401944042e-05, "train_loss": 0.2622831072807551, "train_loss_scale": 4096.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.0213844217837621, "epoch": 1433, "n_parameters": 303924416} {"train_lr": 8.922256340695968e-05, "train_min_lr": 8.922256340695968e-05, "train_loss": 0.26225498986120027, "train_loss_scale": 4096.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.021130365492680516, "epoch": 1434, "n_parameters": 303924416} {"train_lr": 8.82765137579486e-05, "train_min_lr": 8.82765137579486e-05, "train_loss": 0.2622132492490495, "train_loss_scale": 4096.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.021239455198295988, "epoch": 1435, "n_parameters": 303924416} {"train_lr": 8.733599494071077e-05, "train_min_lr": 8.733599494071077e-05, "train_loss": 0.26220151553682697, "train_loss_scale": 4096.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.02113564183505682, "epoch": 1436, "n_parameters": 303924416} {"train_lr": 8.640101062699404e-05, "train_min_lr": 8.640101062699404e-05, "train_loss": 0.26218145564556694, "train_loss_scale": 8152.615384615385, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.021175334630056452, "epoch": 1437, "n_parameters": 303924416} {"train_lr": 8.547156446693963e-05, "train_min_lr": 8.547156446693963e-05, "train_loss": 0.2621926837391817, "train_loss_scale": 8192.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.02185995627242403, "epoch": 1438, "n_parameters": 303924416} {"train_lr": 8.454766008906833e-05, "train_min_lr": 8.454766008906833e-05, "train_loss": 0.26215126363333696, "train_loss_scale": 8192.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.02107309428258584, "epoch": 1439, "n_parameters": 303924416} {"train_lr": 8.362930110026567e-05, "train_min_lr": 8.362930110026567e-05, "train_loss": 0.26207931723314315, "train_loss_scale": 4752.410256410257, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 1440, "n_parameters": 303924416} {"train_lr": 8.271649108576848e-05, "train_min_lr": 8.271649108576848e-05, "train_loss": 0.26204149873676497, "train_loss_scale": 4096.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.020467297866558418, "epoch": 1441, "n_parameters": 303924416} {"train_lr": 8.180923360915051e-05, "train_min_lr": 8.180923360915051e-05, "train_loss": 0.26202396403413075, "train_loss_scale": 4096.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.01979556534654246, "epoch": 1442, "n_parameters": 303924416} {"train_lr": 8.090753221230857e-05, "train_min_lr": 8.090753221230857e-05, "train_loss": 0.2619561936933165, "train_loss_scale": 4096.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.0199598046258474, "epoch": 1443, "n_parameters": 303924416} {"train_lr": 8.00113904154489e-05, "train_min_lr": 8.00113904154489e-05, "train_loss": 0.2619604434376248, "train_loss_scale": 4096.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.020379924639247548, "epoch": 1444, "n_parameters": 303924416} {"train_lr": 7.912081171707306e-05, "train_min_lr": 7.912081171707306e-05, "train_loss": 0.26198514241677445, "train_loss_scale": 4096.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.02143592094907012, "epoch": 1445, "n_parameters": 303924416} {"train_lr": 7.82357995939648e-05, "train_min_lr": 7.82357995939648e-05, "train_loss": 0.26194736362996107, "train_loss_scale": 5855.179487179487, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.020723607809020158, "epoch": 1446, "n_parameters": 303924416} {"train_lr": 7.735635750117588e-05, "train_min_lr": 7.735635750117588e-05, "train_loss": 0.2619179085744784, "train_loss_scale": 8192.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.020334470975332152, "epoch": 1447, "n_parameters": 303924416} {"train_lr": 7.648248887201305e-05, "train_min_lr": 7.648248887201305e-05, "train_loss": 0.2618995786417658, "train_loss_scale": 8192.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.020814271930318613, "epoch": 1448, "n_parameters": 303924416} {"train_lr": 7.561419711802458e-05, "train_min_lr": 7.561419711802458e-05, "train_loss": 0.261868744616946, "train_loss_scale": 6091.48717948718, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 1449, "n_parameters": 303924416} {"train_lr": 7.47514856289866e-05, "train_min_lr": 7.47514856289866e-05, "train_loss": 0.2618433110720406, "train_loss_scale": 4096.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.021730881960441668, "epoch": 1450, "n_parameters": 303924416} {"train_lr": 7.389435777289031e-05, "train_min_lr": 7.389435777289031e-05, "train_loss": 0.2618099019611971, "train_loss_scale": 4096.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.0206067781143177, "epoch": 1451, "n_parameters": 303924416} {"train_lr": 7.304281689592842e-05, "train_min_lr": 7.304281689592842e-05, "train_loss": 0.261805703305305, "train_loss_scale": 4096.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.021356401475480735, "epoch": 1452, "n_parameters": 303924416} {"train_lr": 7.219686632248242e-05, "train_min_lr": 7.219686632248242e-05, "train_loss": 0.26175230886870754, "train_loss_scale": 4096.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.02067475143270806, "epoch": 1453, "n_parameters": 303924416} {"train_lr": 7.13565093551097e-05, "train_min_lr": 7.13565093551097e-05, "train_loss": 0.26174613087175364, "train_loss_scale": 4096.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.021271219453177392, "epoch": 1454, "n_parameters": 303924416} {"train_lr": 7.052174927452995e-05, "train_min_lr": 7.052174927452995e-05, "train_loss": 0.26173776499807644, "train_loss_scale": 4516.102564102564, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.021057071062760092, "epoch": 1455, "n_parameters": 303924416} {"train_lr": 6.969258933961333e-05, "train_min_lr": 6.969258933961333e-05, "train_loss": 0.261709175308426, "train_loss_scale": 8192.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.021177091910384405, "epoch": 1456, "n_parameters": 303924416} {"train_lr": 6.886903278736681e-05, "train_min_lr": 6.886903278736681e-05, "train_loss": 0.26167987952701366, "train_loss_scale": 4306.051282051282, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 1457, "n_parameters": 303924416} {"train_lr": 6.805108283292237e-05, "train_min_lr": 6.805108283292237e-05, "train_loss": 0.26168352594742406, "train_loss_scale": 3945.025641025641, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 1458, "n_parameters": 303924416} {"train_lr": 6.723874266952386e-05, "train_min_lr": 6.723874266952386e-05, "train_loss": 0.26165738741819483, "train_loss_scale": 2048.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.021631942405246008, "epoch": 1459, "n_parameters": 303924416} {"train_lr": 6.643201546851466e-05, "train_min_lr": 6.643201546851466e-05, "train_loss": 0.2615928957089543, "train_loss_scale": 2048.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.0212782662994682, "epoch": 1460, "n_parameters": 303924416} {"train_lr": 6.563090437932561e-05, "train_min_lr": 6.563090437932561e-05, "train_loss": 0.261629278305918, "train_loss_scale": 1893.7435897435898, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 1461, "n_parameters": 303924416} {"train_lr": 6.483541252946215e-05, "train_min_lr": 6.483541252946215e-05, "train_loss": 0.2615840851377027, "train_loss_scale": 1024.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.021773985712430798, "epoch": 1462, "n_parameters": 303924416} {"train_lr": 6.40455430244928e-05, "train_min_lr": 6.40455430244928e-05, "train_loss": 0.2615540479369557, "train_loss_scale": 1024.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.0214222482441423, "epoch": 1463, "n_parameters": 303924416} {"train_lr": 6.32612989480364e-05, "train_min_lr": 6.32612989480364e-05, "train_loss": 0.26157429318528813, "train_loss_scale": 1024.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.021344071857105844, "epoch": 1464, "n_parameters": 303924416} {"train_lr": 6.248268336175046e-05, "train_min_lr": 6.248268336175046e-05, "train_loss": 0.26150643079875946, "train_loss_scale": 1024.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.021445267027817093, "epoch": 1465, "n_parameters": 303924416} {"train_lr": 6.170969930531892e-05, "train_min_lr": 6.170969930531892e-05, "train_loss": 0.2615035195530464, "train_loss_scale": 1024.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.022675657435917318, "epoch": 1466, "n_parameters": 303924416} {"train_lr": 6.0942349796440837e-05, "train_min_lr": 6.0942349796440837e-05, "train_loss": 0.2615054983359117, "train_loss_scale": 1024.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.02313469192132545, "epoch": 1467, "n_parameters": 303924416} {"train_lr": 6.0180637830817734e-05, "train_min_lr": 6.0180637830817734e-05, "train_loss": 0.261472117039375, "train_loss_scale": 1782.1538461538462, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.02184926466538738, "epoch": 1468, "n_parameters": 303924416} {"train_lr": 5.942456638214276e-05, "train_min_lr": 5.942456638214276e-05, "train_loss": 0.26145474447940403, "train_loss_scale": 2048.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.022693075072497893, "epoch": 1469, "n_parameters": 303924416} {"train_lr": 5.867413840208859e-05, "train_min_lr": 5.867413840208859e-05, "train_loss": 0.2614225108677951, "train_loss_scale": 2048.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.02106830880093651, "epoch": 1470, "n_parameters": 303924416} {"train_lr": 5.7929356820295953e-05, "train_min_lr": 5.7929356820295953e-05, "train_loss": 0.2613996906588093, "train_loss_scale": 2048.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.022032519402460028, "epoch": 1471, "n_parameters": 303924416} {"train_lr": 5.719022454436236e-05, "train_min_lr": 5.719022454436236e-05, "train_loss": 0.26134709452983373, "train_loss_scale": 2048.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.020740699858810656, "epoch": 1472, "n_parameters": 303924416} {"train_lr": 5.645674445983068e-05, "train_min_lr": 5.645674445983068e-05, "train_loss": 0.2613093206503739, "train_loss_scale": 2048.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.02120037994777354, "epoch": 1473, "n_parameters": 303924416} {"train_lr": 5.572891943017771e-05, "train_min_lr": 5.572891943017771e-05, "train_loss": 0.26134281550856453, "train_loss_scale": 2724.102564102564, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.021857754787048086, "epoch": 1474, "n_parameters": 303924416} {"train_lr": 5.500675229680325e-05, "train_min_lr": 5.500675229680325e-05, "train_loss": 0.26130678494257903, "train_loss_scale": 4096.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.021350824083082188, "epoch": 1475, "n_parameters": 303924416} {"train_lr": 5.42902458790189e-05, "train_min_lr": 5.42902458790189e-05, "train_loss": 0.2612570393203686, "train_loss_scale": 2953.846153846154, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 1476, "n_parameters": 303924416} {"train_lr": 5.357940297403706e-05, "train_min_lr": 5.357940297403706e-05, "train_loss": 0.2612174362935221, "train_loss_scale": 2048.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.021014101325701445, "epoch": 1477, "n_parameters": 303924416} {"train_lr": 5.287422635695986e-05, "train_min_lr": 5.287422635695986e-05, "train_loss": 0.2612201498839288, "train_loss_scale": 2048.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.021515458565539658, "epoch": 1478, "n_parameters": 303924416} {"train_lr": 5.217471878076868e-05, "train_min_lr": 5.217471878076868e-05, "train_loss": 0.2611781002428287, "train_loss_scale": 2048.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.021337609344090406, "epoch": 1479, "n_parameters": 303924416} {"train_lr": 5.148088297631303e-05, "train_min_lr": 5.148088297631303e-05, "train_loss": 0.26120600551784706, "train_loss_scale": 2048.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.02153914405677754, "epoch": 1480, "n_parameters": 303924416} {"train_lr": 5.079272165230025e-05, "train_min_lr": 5.079272165230025e-05, "train_loss": 0.26112735038026214, "train_loss_scale": 2048.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.021280339441429347, "epoch": 1481, "n_parameters": 303924416} {"train_lr": 5.0110237495284404e-05, "train_min_lr": 5.0110237495284404e-05, "train_loss": 0.2611300592692808, "train_loss_scale": 2349.948717948718, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.021751384232909635, "epoch": 1482, "n_parameters": 303924416} {"train_lr": 4.943343316965651e-05, "train_min_lr": 4.943343316965651e-05, "train_loss": 0.2611212151972816, "train_loss_scale": 4096.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.02206206942598025, "epoch": 1483, "n_parameters": 303924416} {"train_lr": 4.8762311317633326e-05, "train_min_lr": 4.8762311317633326e-05, "train_loss": 0.26110263667905176, "train_loss_scale": 4096.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.022837725408279743, "epoch": 1484, "n_parameters": 303924416} {"train_lr": 4.809687455924794e-05, "train_min_lr": 4.809687455924794e-05, "train_loss": 0.26107366184572667, "train_loss_scale": 4096.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.021983383683105692, "epoch": 1485, "n_parameters": 303924416} {"train_lr": 4.743712549233872e-05, "train_min_lr": 4.743712549233872e-05, "train_loss": 0.26106212833203757, "train_loss_scale": 4096.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.021881258252482765, "epoch": 1486, "n_parameters": 303924416} {"train_lr": 4.678306669253953e-05, "train_min_lr": 4.678306669253953e-05, "train_loss": 0.261030115136423, "train_loss_scale": 4096.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.02221265076420819, "epoch": 1487, "n_parameters": 303924416} {"train_lr": 4.6134700713269854e-05, "train_min_lr": 4.6134700713269854e-05, "train_loss": 0.2610004441454433, "train_loss_scale": 4096.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.022174194204406097, "epoch": 1488, "n_parameters": 303924416} {"train_lr": 4.549203008572446e-05, "train_min_lr": 4.549203008572446e-05, "train_loss": 0.26101797312880176, "train_loss_scale": 7115.48717948718, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.022399514955349076, "epoch": 1489, "n_parameters": 303924416} {"train_lr": 4.485505731886384e-05, "train_min_lr": 4.485505731886384e-05, "train_loss": 0.26100071652744633, "train_loss_scale": 8192.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.023145656441696562, "epoch": 1490, "n_parameters": 303924416} {"train_lr": 4.4223784899403976e-05, "train_min_lr": 4.4223784899403976e-05, "train_loss": 0.2609838002588218, "train_loss_scale": 8192.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.02304126453012801, "epoch": 1491, "n_parameters": 303924416} {"train_lr": 4.3598215291807275e-05, "train_min_lr": 4.3598215291807275e-05, "train_loss": 0.26092460583179045, "train_loss_scale": 8192.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.02219855005088716, "epoch": 1492, "n_parameters": 303924416} {"train_lr": 4.2978350938272475e-05, "train_min_lr": 4.2978350938272475e-05, "train_loss": 0.2609092819217879, "train_loss_scale": 8086.974358974359, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 1493, "n_parameters": 303924416} {"train_lr": 4.23641942587251e-05, "train_min_lr": 4.23641942587251e-05, "train_loss": 0.2609551517465988, "train_loss_scale": 4096.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.02342074391885828, "epoch": 1494, "n_parameters": 303924416} {"train_lr": 4.175574765080827e-05, "train_min_lr": 4.175574765080827e-05, "train_loss": 0.26092856261544883, "train_loss_scale": 4096.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.023901506177842233, "epoch": 1495, "n_parameters": 303924416} {"train_lr": 4.1153013489873176e-05, "train_min_lr": 4.1153013489873176e-05, "train_loss": 0.26087551231746775, "train_loss_scale": 4096.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.022384971947385333, "epoch": 1496, "n_parameters": 303924416} {"train_lr": 4.055599412896989e-05, "train_min_lr": 4.055599412896989e-05, "train_loss": 0.2608675496169151, "train_loss_scale": 4096.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.023154460616075456, "epoch": 1497, "n_parameters": 303924416} {"train_lr": 3.9964691898838054e-05, "train_min_lr": 3.9964691898838054e-05, "train_loss": 0.26083272104808247, "train_loss_scale": 4096.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.022059342441841576, "epoch": 1498, "n_parameters": 303924416} {"train_lr": 3.937910910789793e-05, "train_min_lr": 3.937910910789793e-05, "train_loss": 0.2608424096714514, "train_loss_scale": 4096.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.022723336232444033, "epoch": 1499, "n_parameters": 303924416} {"train_lr": 3.822511096562247e-05, "train_min_lr": 3.822511096562247e-05, "train_loss": 0.26076333368352306, "train_loss_scale": 6616.615384615385, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.022635430252800386, "epoch": 1501, "n_parameters": 303924416} {"train_lr": 3.7656700119449665e-05, "train_min_lr": 3.7656700119449665e-05, "train_loss": 0.2607875407959979, "train_loss_scale": 7049.846153846154, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 1502, "n_parameters": 303924416} {"train_lr": 3.709401772277616e-05, "train_min_lr": 3.709401772277616e-05, "train_loss": 0.26071953726036906, "train_loss_scale": 4096.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.02174474932372761, "epoch": 1503, "n_parameters": 303924416} {"train_lr": 3.6537065972291316e-05, "train_min_lr": 3.6537065972291316e-05, "train_loss": 0.26073500814322287, "train_loss_scale": 3761.230769230769, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 1504, "n_parameters": 303924416} {"train_lr": 3.598584704231254e-05, "train_min_lr": 3.598584704231254e-05, "train_loss": 0.2607193334141555, "train_loss_scale": 2048.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.02264251517585646, "epoch": 1505, "n_parameters": 303924416} {"train_lr": 3.544036308477659e-05, "train_min_lr": 3.544036308477659e-05, "train_loss": 0.26069496002478093, "train_loss_scale": 2048.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.022084308358339164, "epoch": 1506, "n_parameters": 303924416} {"train_lr": 3.4900616229230846e-05, "train_min_lr": 3.4900616229230846e-05, "train_loss": 0.2606770774970452, "train_loss_scale": 2048.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.02338145406415256, "epoch": 1507, "n_parameters": 303924416} {"train_lr": 3.436660858282553e-05, "train_min_lr": 3.436660858282553e-05, "train_loss": 0.26067187883652365, "train_loss_scale": 2048.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.022839989328088287, "epoch": 1508, "n_parameters": 303924416} {"train_lr": 3.383834223030501e-05, "train_min_lr": 3.383834223030501e-05, "train_loss": 0.26065549623364437, "train_loss_scale": 2048.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.022810732623418935, "epoch": 1509, "n_parameters": 303924416} {"train_lr": 3.331581923400004e-05, "train_min_lr": 3.331581923400004e-05, "train_loss": 0.2606495884993376, "train_loss_scale": 2048.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.023567306868827496, "epoch": 1510, "n_parameters": 303924416} {"train_lr": 3.2799041633819495e-05, "train_min_lr": 3.2799041633819495e-05, "train_loss": 0.26059002799387926, "train_loss_scale": 3590.5641025641025, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.02297450117289256, "epoch": 1511, "n_parameters": 303924416} {"train_lr": 3.228801144724241e-05, "train_min_lr": 3.228801144724241e-05, "train_loss": 0.26062421820377213, "train_loss_scale": 4096.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.02311638610747953, "epoch": 1512, "n_parameters": 303924416} {"train_lr": 3.178273066931021e-05, "train_min_lr": 3.178273066931021e-05, "train_loss": 0.2605642335674463, "train_loss_scale": 4096.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.022722393901158985, "epoch": 1513, "n_parameters": 303924416} {"train_lr": 3.12832012726187e-05, "train_min_lr": 3.12832012726187e-05, "train_loss": 0.26055068012553817, "train_loss_scale": 4096.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.022699078747954887, "epoch": 1514, "n_parameters": 303924416} {"train_lr": 3.078942520731082e-05, "train_min_lr": 3.078942520731082e-05, "train_loss": 0.2605410462830407, "train_loss_scale": 4096.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.02303600099343711, "epoch": 1515, "n_parameters": 303924416} {"train_lr": 3.030140440106846e-05, "train_min_lr": 3.030140440106846e-05, "train_loss": 0.26051174730169946, "train_loss_scale": 4096.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.02307509813004006, "epoch": 1516, "n_parameters": 303924416} {"train_lr": 2.981914075910532e-05, "train_min_lr": 2.981914075910532e-05, "train_loss": 0.26052236278505564, "train_loss_scale": 5500.717948717948, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.024003403601594843, "epoch": 1517, "n_parameters": 303924416} {"train_lr": 2.93426361641594e-05, "train_min_lr": 2.93426361641594e-05, "train_loss": 0.26050258001002174, "train_loss_scale": 4201.025641025641, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 1518, "n_parameters": 303924416} {"train_lr": 2.8871892476485508e-05, "train_min_lr": 2.8871892476485508e-05, "train_loss": 0.2604490953694599, "train_loss_scale": 4096.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.02350543178498554, "epoch": 1519, "n_parameters": 303924416} {"train_lr": 2.794769515151437e-05, "train_min_lr": 2.794769515151437e-05, "train_loss": 0.26044492630395466, "train_loss_scale": 4096.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.02311506183841863, "epoch": 1521, "n_parameters": 303924416} {"train_lr": 2.7494245122246477e-05, "train_min_lr": 2.7494245122246477e-05, "train_loss": 0.2604651591883829, "train_loss_scale": 4096.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.023366782581433654, "epoch": 1522, "n_parameters": 303924416} {"train_lr": 2.7046563216295282e-05, "train_min_lr": 2.7046563216295282e-05, "train_loss": 0.2604328122300406, "train_loss_scale": 4096.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.02312847348049474, "epoch": 1523, "n_parameters": 303924416} {"train_lr": 2.660465118139297e-05, "train_min_lr": 2.660465118139297e-05, "train_loss": 0.2603811016903283, "train_loss_scale": 4096.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.023829043556291323, "epoch": 1524, "n_parameters": 303924416} {"train_lr": 2.6168510742746464e-05, "train_min_lr": 2.6168510742746464e-05, "train_loss": 0.26038031579138565, "train_loss_scale": 6406.5641025641025, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.024120470807433892, "epoch": 1525, "n_parameters": 303924416} {"train_lr": 2.573814360303059e-05, "train_min_lr": 2.573814360303059e-05, "train_loss": 0.26037009260975397, "train_loss_scale": 7338.666666666667, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 1526, "n_parameters": 303924416} {"train_lr": 2.5313551442381402e-05, "train_min_lr": 2.5313551442381402e-05, "train_loss": 0.26037582110924024, "train_loss_scale": 4096.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.024779702106920574, "epoch": 1527, "n_parameters": 303924416} {"train_lr": 2.489473591838974e-05, "train_min_lr": 2.489473591838974e-05, "train_loss": 0.26034745745635474, "train_loss_scale": 4096.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.02473643070492798, "epoch": 1528, "n_parameters": 303924416} {"train_lr": 2.4481698666094585e-05, "train_min_lr": 2.4481698666094585e-05, "train_loss": 0.2603621508430642, "train_loss_scale": 4096.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.024847611570014402, "epoch": 1529, "n_parameters": 303924416} {"train_lr": 2.4074441297976873e-05, "train_min_lr": 2.4074441297976873e-05, "train_loss": 0.2603255005625005, "train_loss_scale": 4096.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.023302261120615862, "epoch": 1530, "n_parameters": 303924416} {"train_lr": 2.3672965403953075e-05, "train_min_lr": 2.3672965403953075e-05, "train_loss": 0.2602847720741127, "train_loss_scale": 4096.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.023909661936788604, "epoch": 1531, "n_parameters": 303924416} {"train_lr": 2.327727255136899e-05, "train_min_lr": 2.327727255136899e-05, "train_loss": 0.26027170660642857, "train_loss_scale": 3754.6666666666665, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 1532, "n_parameters": 303924416} {"train_lr": 2.2887364284993653e-05, "train_min_lr": 2.2887364284993653e-05, "train_loss": 0.2602679981515767, "train_loss_scale": 2048.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.02440222297221995, "epoch": 1533, "n_parameters": 303924416} {"train_lr": 2.2503242127013326e-05, "train_min_lr": 2.2503242127013326e-05, "train_loss": 0.2602560719749771, "train_loss_scale": 2048.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.023788557155057788, "epoch": 1534, "n_parameters": 303924416} {"train_lr": 2.2124907577025608e-05, "train_min_lr": 2.2124907577025608e-05, "train_loss": 0.26026415856531226, "train_loss_scale": 2048.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.02400090254079073, "epoch": 1535, "n_parameters": 303924416} {"train_lr": 2.175236211203337e-05, "train_min_lr": 2.175236211203337e-05, "train_loss": 0.260241797229705, "train_loss_scale": 1545.8461538461538, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 1536, "n_parameters": 303924416} {"train_lr": 2.13856071864392e-05, "train_min_lr": 2.13856071864392e-05, "train_loss": 0.26020569807718485, "train_loss_scale": 1024.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.024612510152973045, "epoch": 1537, "n_parameters": 303924416} {"train_lr": 2.1024644232039657e-05, "train_min_lr": 2.1024644232039657e-05, "train_loss": 0.260242767778273, "train_loss_scale": 1024.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.023794471423547618, "epoch": 1538, "n_parameters": 303924416} {"train_lr": 2.0669474658019664e-05, "train_min_lr": 2.0669474658019664e-05, "train_loss": 0.2602106561460413, "train_loss_scale": 1024.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.023383339240144078, "epoch": 1539, "n_parameters": 303924416} {"train_lr": 2.032009985094699e-05, "train_min_lr": 2.032009985094699e-05, "train_loss": 0.26019147713668644, "train_loss_scale": 1024.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.02359718310001951, "epoch": 1540, "n_parameters": 303924416} {"train_lr": 1.9976521174766968e-05, "train_min_lr": 1.9976521174766968e-05, "train_loss": 0.2601850144415855, "train_loss_scale": 1024.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.0235208950829334, "epoch": 1541, "n_parameters": 303924416} {"train_lr": 1.963873997079691e-05, "train_min_lr": 1.963873997079691e-05, "train_loss": 0.26014989289120793, "train_loss_scale": 1106.051282051282, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.024162523633537766, "epoch": 1542, "n_parameters": 303924416} {"train_lr": 1.930675755772116e-05, "train_min_lr": 1.930675755772116e-05, "train_loss": 0.2601350737043107, "train_loss_scale": 2048.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.02453171549579845, "epoch": 1543, "n_parameters": 303924416} {"train_lr": 1.8980575231585747e-05, "train_min_lr": 1.8980575231585747e-05, "train_loss": 0.26014245787742907, "train_loss_scale": 2048.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.024387245723165762, "epoch": 1544, "n_parameters": 303924416} {"train_lr": 1.8660194265793465e-05, "train_min_lr": 1.8660194265793465e-05, "train_loss": 0.2601275180693334, "train_loss_scale": 2048.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.025089627812401608, "epoch": 1545, "n_parameters": 303924416} {"train_lr": 1.8345615911098684e-05, "train_min_lr": 1.8345615911098684e-05, "train_loss": 0.26014968992557186, "train_loss_scale": 2048.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.0249177449896263, "epoch": 1546, "n_parameters": 303924416} {"train_lr": 1.803684139560281e-05, "train_min_lr": 1.803684139560281e-05, "train_loss": 0.26012585275710964, "train_loss_scale": 2048.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.025027807073619884, "epoch": 1547, "n_parameters": 303924416} {"train_lr": 1.773387192474912e-05, "train_min_lr": 1.773387192474912e-05, "train_loss": 0.2600874594376924, "train_loss_scale": 2048.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.024343622675261054, "epoch": 1548, "n_parameters": 303924416} {"train_lr": 1.743670868131832e-05, "train_min_lr": 1.743670868131832e-05, "train_loss": 0.26008934403459233, "train_loss_scale": 3419.897435897436, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.024666867105481334, "epoch": 1549, "n_parameters": 303924416} {"train_lr": 1.7145352825423828e-05, "train_min_lr": 1.7145352825423828e-05, "train_loss": 0.2600446853470296, "train_loss_scale": 4096.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.02495406067166, "epoch": 1550, "n_parameters": 303924416} {"train_lr": 1.68598054945072e-05, "train_min_lr": 1.68598054945072e-05, "train_loss": 0.26005919796760935, "train_loss_scale": 4096.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.0251682468284017, "epoch": 1551, "n_parameters": 303924416} {"train_lr": 1.6580067803333854e-05, "train_min_lr": 1.6580067803333854e-05, "train_loss": 0.2600838491687169, "train_loss_scale": 3780.923076923077, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 1552, "n_parameters": 303924416} {"train_lr": 1.6306140843988466e-05, "train_min_lr": 1.6306140843988466e-05, "train_loss": 0.2600794965192341, "train_loss_scale": 1024.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.025107456538348626, "epoch": 1553, "n_parameters": 303924416} {"train_lr": 1.6038025685870983e-05, "train_min_lr": 1.6038025685870983e-05, "train_loss": 0.2600706835540059, "train_loss_scale": 1024.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.025337366554408502, "epoch": 1554, "n_parameters": 303924416} {"train_lr": 1.5775723375692182e-05, "train_min_lr": 1.5775723375692182e-05, "train_loss": 0.26003848224914133, "train_loss_scale": 1024.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.026264051411253136, "epoch": 1555, "n_parameters": 303924416} {"train_lr": 1.5519234937469837e-05, "train_min_lr": 1.5519234937469837e-05, "train_loss": 0.2600518086125167, "train_loss_scale": 1024.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.024835841252635688, "epoch": 1556, "n_parameters": 303924416} {"train_lr": 1.5268561372524495e-05, "train_min_lr": 1.5268561372524495e-05, "train_loss": 0.26004020161090946, "train_loss_scale": 1024.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.027971780238052208, "epoch": 1557, "n_parameters": 303924416} {"train_lr": 1.5023703659475781e-05, "train_min_lr": 1.5023703659475781e-05, "train_loss": 0.25996728914264494, "train_loss_scale": 1024.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.02472160572114472, "epoch": 1558, "n_parameters": 303924416} {"train_lr": 1.478466275423835e-05, "train_min_lr": 1.478466275423835e-05, "train_loss": 0.25998676414840305, "train_loss_scale": 1700.1025641025642, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.02644890411876333, "epoch": 1559, "n_parameters": 303924416} {"train_lr": 1.455143959001833e-05, "train_min_lr": 1.455143959001833e-05, "train_loss": 0.25998200489303624, "train_loss_scale": 2048.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.0263353118326706, "epoch": 1560, "n_parameters": 303924416} {"train_lr": 1.432403507730965e-05, "train_min_lr": 1.432403507730965e-05, "train_loss": 0.2599711386391368, "train_loss_scale": 2048.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.02522341752400956, "epoch": 1561, "n_parameters": 303924416} {"train_lr": 1.41024501038904e-05, "train_min_lr": 1.41024501038904e-05, "train_loss": 0.26000218556370014, "train_loss_scale": 1499.8974358974358, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 1562, "n_parameters": 303924416} {"train_lr": 1.388668553481944e-05, "train_min_lr": 1.388668553481944e-05, "train_loss": 0.25995667839351183, "train_loss_scale": 1024.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.02508338880486404, "epoch": 1563, "n_parameters": 303924416} {"train_lr": 1.3676742212433047e-05, "train_min_lr": 1.3676742212433047e-05, "train_loss": 0.2599614565380109, "train_loss_scale": 1024.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.02629964467949974, "epoch": 1564, "n_parameters": 303924416} {"train_lr": 1.3472620956341499e-05, "train_min_lr": 1.3472620956341499e-05, "train_loss": 0.25996072636618733, "train_loss_scale": 1024.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.024182508126474343, "epoch": 1565, "n_parameters": 303924416} {"train_lr": 1.3274322563426021e-05, "train_min_lr": 1.3274322563426021e-05, "train_loss": 0.2599416238894591, "train_loss_scale": 1024.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.02509768839734487, "epoch": 1566, "n_parameters": 303924416} {"train_lr": 1.3081847807835623e-05, "train_min_lr": 1.3081847807835623e-05, "train_loss": 0.2599411301840192, "train_loss_scale": 1024.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.025580326811625406, "epoch": 1567, "n_parameters": 303924416} {"train_lr": 1.2895197440984016e-05, "train_min_lr": 1.2895197440984016e-05, "train_loss": 0.2599595380284322, "train_loss_scale": 1152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.024713251775560472, "epoch": 1568, "n_parameters": 303924416} {"train_lr": 1.2714372191546779e-05, "train_min_lr": 1.2714372191546779e-05, "train_loss": 0.2598700027894945, "train_loss_scale": 2048.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.025855309747828122, "epoch": 1569, "n_parameters": 303924416} {"train_lr": 1.2539372765458446e-05, "train_min_lr": 1.2539372765458446e-05, "train_loss": 0.25988743061390823, "train_loss_scale": 2048.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.02555372789263343, "epoch": 1570, "n_parameters": 303924416} {"train_lr": 1.2370199845909771e-05, "train_min_lr": 1.2370199845909771e-05, "train_loss": 0.25990785470005506, "train_loss_scale": 2048.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.024975287936962187, "epoch": 1571, "n_parameters": 303924416} {"train_lr": 1.2206854093345032e-05, "train_min_lr": 1.2206854093345032e-05, "train_loss": 0.2598884429675169, "train_loss_scale": 2048.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.025613430994921006, "epoch": 1572, "n_parameters": 303924416} {"train_lr": 1.2049336145459547e-05, "train_min_lr": 1.2049336145459547e-05, "train_loss": 0.25983513654985774, "train_loss_scale": 2048.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.025408555371448014, "epoch": 1573, "n_parameters": 303924416} {"train_lr": 1.1897646617197056e-05, "train_min_lr": 1.1897646617197056e-05, "train_loss": 0.2598821016488215, "train_loss_scale": 2048.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.026166373839936197, "epoch": 1574, "n_parameters": 303924416} {"train_lr": 1.1751786100747415e-05, "train_min_lr": 1.1751786100747415e-05, "train_loss": 0.2598780093517584, "train_loss_scale": 3511.7948717948716, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.026156023974554278, "epoch": 1575, "n_parameters": 303924416} {"train_lr": 1.1611755165544217e-05, "train_min_lr": 1.1611755165544217e-05, "train_loss": 0.25985637097917974, "train_loss_scale": 4096.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.025245012590088524, "epoch": 1576, "n_parameters": 303924416} {"train_lr": 1.147755435826266e-05, "train_min_lr": 1.147755435826266e-05, "train_loss": 0.2598499802526278, "train_loss_scale": 4096.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.024906960918974035, "epoch": 1577, "n_parameters": 303924416} {"train_lr": 1.1349184202817314e-05, "train_min_lr": 1.1349184202817314e-05, "train_loss": 0.2598532831075434, "train_loss_scale": 2356.5128205128203, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 1578, "n_parameters": 303924416} {"train_lr": 1.1226645200360109e-05, "train_min_lr": 1.1226645200360109e-05, "train_loss": 0.25982652734726286, "train_loss_scale": 2048.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.027388259046114027, "epoch": 1579, "n_parameters": 303924416} {"train_lr": 1.1109937829278423e-05, "train_min_lr": 1.1109937829278423e-05, "train_loss": 0.25983129935756993, "train_loss_scale": 2048.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.02527214931204724, "epoch": 1580, "n_parameters": 303924416} {"train_lr": 1.0999062545193157e-05, "train_min_lr": 1.0999062545193157e-05, "train_loss": 0.25981224922594637, "train_loss_scale": 2048.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.026218293169035744, "epoch": 1581, "n_parameters": 303924416} {"train_lr": 1.0894019780956976e-05, "train_min_lr": 1.0894019780956976e-05, "train_loss": 0.2598492906291563, "train_loss_scale": 2048.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.025853835076141436, "epoch": 1582, "n_parameters": 303924416} {"train_lr": 1.0794809946652626e-05, "train_min_lr": 1.0794809946652626e-05, "train_loss": 0.2598181154626684, "train_loss_scale": 2048.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.02611683740113408, "epoch": 1583, "n_parameters": 303924416} {"train_lr": 1.0701433429591352e-05, "train_min_lr": 1.0701433429591352e-05, "train_loss": 0.2598197017557537, "train_loss_scale": 2947.2820512820513, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.0269251538034624, "epoch": 1584, "n_parameters": 303924416} {"train_lr": 1.0613890594311302e-05, "train_min_lr": 1.0613890594311302e-05, "train_loss": 0.25978895701850074, "train_loss_scale": 4096.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.025670343001062672, "epoch": 1585, "n_parameters": 303924416} {"train_lr": 1.05321817825762e-05, "train_min_lr": 1.05321817825762e-05, "train_loss": 0.2598066193929229, "train_loss_scale": 2323.6923076923076, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 1586, "n_parameters": 303924416} {"train_lr": 1.0456307313374012e-05, "train_min_lr": 1.0456307313374012e-05, "train_loss": 0.2598205640828476, "train_loss_scale": 2048.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.025425394310448796, "epoch": 1587, "n_parameters": 303924416} {"train_lr": 1.0386267482915607e-05, "train_min_lr": 1.0386267482915607e-05, "train_loss": 0.2597861552330403, "train_loss_scale": 2048.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.025778245133085128, "epoch": 1588, "n_parameters": 303924416} {"train_lr": 1.032206256463369e-05, "train_min_lr": 1.032206256463369e-05, "train_loss": 0.25979745275197696, "train_loss_scale": 2048.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.025694748589721244, "epoch": 1589, "n_parameters": 303924416} {"train_lr": 1.0263692809181657e-05, "train_min_lr": 1.0263692809181657e-05, "train_loss": 0.2597649503015698, "train_loss_scale": 2048.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.026049917892146952, "epoch": 1590, "n_parameters": 303924416} {"train_lr": 1.0211158444432734e-05, "train_min_lr": 1.0211158444432734e-05, "train_loss": 0.259787910996984, "train_loss_scale": 2048.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.026461095889457144, "epoch": 1591, "n_parameters": 303924416} {"train_lr": 1.0164459675478965e-05, "train_min_lr": 1.0164459675478965e-05, "train_loss": 0.2597645598057753, "train_loss_scale": 2402.4615384615386, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 1592, "n_parameters": 303924416} {"train_lr": 1.0123596684630482e-05, "train_min_lr": 1.0123596684630482e-05, "train_loss": 0.2597629513281087, "train_loss_scale": 2048.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.02679715130645304, "epoch": 1593, "n_parameters": 303924416} {"train_lr": 1.008856963141474e-05, "train_min_lr": 1.008856963141474e-05, "train_loss": 0.2597793157881078, "train_loss_scale": 2048.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.02661537051272507, "epoch": 1594, "n_parameters": 303924416} {"train_lr": 1.0059378652576001e-05, "train_min_lr": 1.0059378652576001e-05, "train_loss": 0.2598068847900065, "train_loss_scale": 2048.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.02549564276033869, "epoch": 1595, "n_parameters": 303924416} {"train_lr": 1.0036023862074616e-05, "train_min_lr": 1.0036023862074616e-05, "train_loss": 0.25974610979812074, "train_loss_scale": 2048.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.02649570723923926, "epoch": 1596, "n_parameters": 303924416} {"train_lr": 1.0018505351086794e-05, "train_min_lr": 1.0018505351086794e-05, "train_loss": 0.25976181417130506, "train_loss_scale": 2048.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.026040367698535707, "epoch": 1597, "n_parameters": 303924416} {"train_lr": 1.00068231880041e-05, "train_min_lr": 1.00068231880041e-05, "train_loss": 0.2597546549036335, "train_loss_scale": 2048.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.026451420373259447, "epoch": 1598, "n_parameters": 303924416} {"train_lr": 1.0000977418433219e-05, "train_min_lr": 1.0000977418433219e-05, "train_loss": 0.25974946196835774, "train_loss_scale": 3590.5641025641025, "train_weight_decay": 0.050000000000000266, "train_grad_norm": NaN, "epoch": 1599, "n_parameters": 303924416}